diff --git a/.prettierrc.json b/.prettierrc.json index 2f8653e..0653844 100644 --- a/.prettierrc.json +++ b/.prettierrc.json @@ -1,9 +1,11 @@ { - "semi": true, - "trailingComma": "es5", + "printWidth": 100, "singleQuote": true, - "printWidth": 80, - "tabWidth": 2, + "trailingComma": "all", + "quoteProps": "preserve", + "bracketSpacing": false, + "arrowParens": "avoid", + "embeddedLanguageFormatting": "off", "overrides": [ { "files": "*.html", diff --git a/report-app/report-server.ts b/report-app/report-server.ts index b61efa5..e6878d8 100644 --- a/report-app/report-server.ts +++ b/report-app/report-server.ts @@ -5,14 +5,11 @@ import { writeResponseToNodeResponse, } from '@angular/ssr/node'; import express from 'express'; -import { dirname, isAbsolute, join, resolve } from 'node:path'; -import { fileURLToPath } from 'node:url'; -import { - FetchedLocalReports, - fetchReportsFromDisk, -} from '../runner/reporting/report-local-disk'; -import { RunInfo } from '../runner/shared-interfaces'; -import { convertV2ReportToV3Report } from '../runner/reporting/migrations/v2_to_v3'; +import {dirname, isAbsolute, join, resolve} from 'node:path'; +import {fileURLToPath} from 'node:url'; +import {FetchedLocalReports, fetchReportsFromDisk} from '../runner/reporting/report-local-disk'; +import {RunInfo} from '../runner/shared-interfaces'; +import {convertV2ReportToV3Report} from '../runner/reporting/migrations/v2_to_v3'; const app = express(); const reportsLoader = await getReportLoader(); @@ -50,7 +47,7 @@ app.get('/api/reports/:id', async (req, res) => { } // Convert potential older v2 reports. - result = result.map((r) => convertV2ReportToV3Report(r)); + result = result.map(r => convertV2ReportToV3Report(r)); res.json(result); }); @@ -60,13 +57,13 @@ app.use( maxAge: '1y', index: false, redirect: false, - }) + }), ); app.use('/**', (req, res, next) => { angularApp .handle(req) - .then((response) => { + .then(response => { return response ? writeResponseToNodeResponse(response, res) : next(); }) .catch(next); @@ -85,7 +82,7 @@ export const reqHandler = createNodeRequestHandler(app); interface ReportLoader { getGroupedReports: (groupId: string) => Promise; - getGroupsList: () => Promise<{ id: string }[]>; + getGroupsList: () => Promise<{id: string}[]>; configureEndpoints?: (expressApp: typeof app) => Promise; } @@ -93,14 +90,11 @@ interface ReportLoader { function getOptions() { const defaultPort = 4200; const envPort = process.env['CODEGEN_REPORTS_PORT']; - const reportsRoot = - process.env['CODEGEN_REPORTS_DIR'] || './.web-codegen-scorer/reports'; + const reportsRoot = process.env['CODEGEN_REPORTS_DIR'] || './.web-codegen-scorer/reports'; return { port: envPort ? parseInt(envPort) || defaultPort : defaultPort, - reportsRoot: isAbsolute(reportsRoot) - ? reportsRoot - : join(process.cwd(), reportsRoot), + reportsRoot: isAbsolute(reportsRoot) ? reportsRoot : join(process.cwd(), reportsRoot), }; } @@ -118,9 +112,7 @@ async function getReportLoader() { const loaderImportPath = isAbsolute(reportLoaderPath) ? reportLoaderPath : join(process.cwd(), reportLoaderPath); - const importResult: { default: ReportLoader } = await import( - /* @vite-ignore */ loaderImportPath - ); + const importResult: {default: ReportLoader} = await import(/* @vite-ignore */ loaderImportPath); if ( !importResult.default || @@ -129,7 +121,7 @@ async function getReportLoader() { ) { throw new Error( 'Invalid remote import loader. The file must have a default export ' + - 'with `getGroupedReports` and `getGroupsList` functions.' + 'with `getGroupedReports` and `getGroupsList` functions.', ); } @@ -140,7 +132,7 @@ async function resolveLocalData(directory: string) { // Reuse the same promise so that concurrent requests get the same response. if (!localDataPromise) { let resolveFn: (data: FetchedLocalReports) => void; - localDataPromise = new Promise((resolve) => (resolveFn = resolve)); + localDataPromise = new Promise(resolve => (resolveFn = resolve)); resolveFn!(await fetchReportsFromDisk(directory)); } diff --git a/report-app/src/app/app.config.server.ts b/report-app/src/app/app.config.server.ts index f594352..89098f5 100644 --- a/report-app/src/app/app.config.server.ts +++ b/report-app/src/app/app.config.server.ts @@ -1,6 +1,6 @@ -import { provideServerRendering, RenderMode, withRoutes } from '@angular/ssr'; -import { mergeApplicationConfig, ApplicationConfig } from '@angular/core'; -import { appConfig } from './app.config'; +import {provideServerRendering, RenderMode, withRoutes} from '@angular/ssr'; +import {mergeApplicationConfig, ApplicationConfig} from '@angular/core'; +import {appConfig} from './app.config'; const ssrAppConfig: ApplicationConfig = { providers: [ @@ -11,7 +11,7 @@ const ssrAppConfig: ApplicationConfig = { path: '**', renderMode: RenderMode.Server, }, - ]) + ]), ), ], }; diff --git a/report-app/src/app/app.config.ts b/report-app/src/app/app.config.ts index 223a9e9..bc58ed4 100644 --- a/report-app/src/app/app.config.ts +++ b/report-app/src/app/app.config.ts @@ -5,20 +5,16 @@ import { provideBrowserGlobalErrorListeners, provideZoneChangeDetection, } from '@angular/core'; -import { - provideRouter, - withComponentInputBinding, - withViewTransitions, -} from '@angular/router'; +import {provideRouter, withComponentInputBinding, withViewTransitions} from '@angular/router'; -import { routes } from './app.routes'; -import { provideHttpClient, withFetch } from '@angular/common/http'; -import { GoogleChartsLoader } from './services/google-charts-loader'; +import {routes} from './app.routes'; +import {provideHttpClient, withFetch} from '@angular/common/http'; +import {GoogleChartsLoader} from './services/google-charts-loader'; export const appConfig: ApplicationConfig = { providers: [ provideBrowserGlobalErrorListeners(), - provideZoneChangeDetection({ eventCoalescing: true }), + provideZoneChangeDetection({eventCoalescing: true}), provideRouter(routes, withComponentInputBinding(), withViewTransitions()), provideHttpClient(withFetch()), provideAppInitializer(() => inject(GoogleChartsLoader).initialize()), diff --git a/report-app/src/app/app.routes.ts b/report-app/src/app/app.routes.ts index c33e8b3..cb60796 100644 --- a/report-app/src/app/app.routes.ts +++ b/report-app/src/app/app.routes.ts @@ -1,7 +1,7 @@ -import { Routes } from '@angular/router'; -import { ReportViewer } from './pages/report-viewer/report-viewer'; -import { ComparisonPage } from './pages/comparison/comparison'; -import { ReportListComponent } from './pages/report-list/report-list'; +import {Routes} from '@angular/router'; +import {ReportViewer} from './pages/report-viewer/report-viewer'; +import {ComparisonPage} from './pages/comparison/comparison'; +import {ReportListComponent} from './pages/report-list/report-list'; export const routes: Routes = [ { diff --git a/report-app/src/app/app.spec.ts b/report-app/src/app/app.spec.ts index 727035e..a744a1f 100644 --- a/report-app/src/app/app.spec.ts +++ b/report-app/src/app/app.spec.ts @@ -1,5 +1,5 @@ -import { TestBed } from '@angular/core/testing'; -import { App } from './app'; +import {TestBed} from '@angular/core/testing'; +import {App} from './app'; describe('App', () => { beforeEach(async () => { @@ -18,8 +18,6 @@ describe('App', () => { const fixture = TestBed.createComponent(App); fixture.detectChanges(); const compiled = fixture.nativeElement as HTMLElement; - expect(compiled.querySelector('h1')?.textContent).toContain( - 'Hello, report-app' - ); + expect(compiled.querySelector('h1')?.textContent).toContain('Hello, report-app'); }); }); diff --git a/report-app/src/app/app.ts b/report-app/src/app/app.ts index ef754c7..35f2b50 100644 --- a/report-app/src/app/app.ts +++ b/report-app/src/app/app.ts @@ -1,8 +1,8 @@ -import { Component, inject, PLATFORM_ID } from '@angular/core'; -import { RouterLink, RouterLinkActive, RouterOutlet } from '@angular/router'; -import { ReportsFetcher } from './services/reports-fetcher'; -import { isPlatformServer } from '@angular/common'; -import { AppColorMode } from './services/app-color-mode'; +import {Component, inject, PLATFORM_ID} from '@angular/core'; +import {RouterLink, RouterLinkActive, RouterOutlet} from '@angular/router'; +import {ReportsFetcher} from './services/reports-fetcher'; +import {isPlatformServer} from '@angular/common'; +import {AppColorMode} from './services/app-color-mode'; @Component({ selector: 'app-root', @@ -21,8 +21,6 @@ export class App { protected groupsError = this.reportsFetcher.reportGroupsError; protected toggleColorMode() { - this.colorModeService.setColorMode( - this.colorMode() === 'light' ? 'dark' : 'light' - ); + this.colorModeService.setColorMode(this.colorMode() === 'light' ? 'dark' : 'light'); } } diff --git a/report-app/src/app/pages/comparison/comparison.ts b/report-app/src/app/pages/comparison/comparison.ts index 8148faf..961bd3a 100644 --- a/report-app/src/app/pages/comparison/comparison.ts +++ b/report-app/src/app/pages/comparison/comparison.ts @@ -1,13 +1,13 @@ -import { Component, computed, inject, linkedSignal } from '@angular/core'; -import { toSignal } from '@angular/core/rxjs-interop'; -import { map } from 'rxjs'; -import { ComparisonScoreDistribution } from '../../shared/comparison/comparison-score-distribution'; -import { ComparisonBuildDistribution } from '../../shared/comparison/comparison-build-distribution'; -import { ModelComparisonData } from '../../shared/comparison/comparison-data'; -import { ReportsFetcher } from '../../services/reports-fetcher'; -import { ReportSelect } from '../../shared/report-select/report-select'; -import { ComparisonRuntimeDistribution } from '../../shared/comparison/comparison-runtime-distribution'; -import { ActivatedRoute } from '@angular/router'; +import {Component, computed, inject, linkedSignal} from '@angular/core'; +import {toSignal} from '@angular/core/rxjs-interop'; +import {map} from 'rxjs'; +import {ComparisonScoreDistribution} from '../../shared/comparison/comparison-score-distribution'; +import {ComparisonBuildDistribution} from '../../shared/comparison/comparison-build-distribution'; +import {ModelComparisonData} from '../../shared/comparison/comparison-data'; +import {ReportsFetcher} from '../../services/reports-fetcher'; +import {ReportSelect} from '../../shared/report-select/report-select'; +import {ComparisonRuntimeDistribution} from '../../shared/comparison/comparison-runtime-distribution'; +import {ActivatedRoute} from '@angular/router'; @Component({ templateUrl: './comparison.html', @@ -31,10 +31,10 @@ export class ComparisonPage { }), computation: () => { const allGroups = this.groups(); - const results: { reportName: string; groupId: string | null }[] = []; + const results: {reportName: string; groupId: string | null}[] = []; - this.selectedGroups().forEach((id) => { - const correspondingGroup = allGroups.find((group) => group.id === id); + this.selectedGroups().forEach(id => { + const correspondingGroup = allGroups.find(group => group.id === id); if (correspondingGroup) { results.push({ @@ -50,22 +50,22 @@ export class ComparisonPage { readonly selectedGroups = toSignal( this.route.queryParams.pipe( - map((params) => { + map(params => { const ids = params['groups']; return ids && Array.isArray(ids) ? ids : []; - }) + }), ), - { requireSync: true } + {requireSync: true}, ); readonly comparisonModelData = computed(() => { const allGroups = this.groups(); const selectedGroups = this.groupsToCompare() - .map((g) => ({ + .map(g => ({ reportName: g.reportName, - group: allGroups.find((current) => current.id === g.groupId)!, + group: allGroups.find(current => current.id === g.groupId)!, })) - .filter((g) => !!g.group); + .filter(g => !!g.group); if (selectedGroups.length < 2) { return null; @@ -73,11 +73,10 @@ export class ComparisonPage { return { averageAppsCount: Math.floor( - selectedGroups.reduce((acc, r) => r.group.appsCount + acc, 0) / - selectedGroups.length + selectedGroups.reduce((acc, r) => r.group.appsCount + acc, 0) / selectedGroups.length, ), series: [ - ...selectedGroups.map((r) => ({ + ...selectedGroups.map(r => ({ name: r.reportName, stats: r.group.stats, appsCount: r.group.appsCount, @@ -86,7 +85,7 @@ export class ComparisonPage { } satisfies ModelComparisonData; }); - protected updateReportName(report: { reportName: string }, newName: string) { + protected updateReportName(report: {reportName: string}, newName: string) { report.reportName = newName; this.groupsToCompare.set([...this.groupsToCompare()]); } @@ -94,7 +93,7 @@ export class ComparisonPage { protected setSelectedGroup(index: number, groupId: string | undefined) { const allGroups = this.groups(); const current = this.groupsToCompare(); - const correspondingGroup = allGroups.find((group) => group.id === groupId); + const correspondingGroup = allGroups.find(group => group.id === groupId); if (correspondingGroup) { current[index] = { diff --git a/report-app/src/app/pages/report-list/report-list.ts b/report-app/src/app/pages/report-list/report-list.ts index 2cefefe..7f2b75b 100644 --- a/report-app/src/app/pages/report-list/report-list.ts +++ b/report-app/src/app/pages/report-list/report-list.ts @@ -1,33 +1,20 @@ -import { - Component, - computed, - inject, - PLATFORM_ID, - signal, -} from '@angular/core'; -import { Router, RouterLink } from '@angular/router'; -import { ReportsFetcher } from '../../services/reports-fetcher'; -import { DatePipe, isPlatformServer } from '@angular/common'; -import { ScoreBucket, RunGroup } from '../../../../../runner/shared-interfaces'; +import {Component, computed, inject, PLATFORM_ID, signal} from '@angular/core'; +import {Router, RouterLink} from '@angular/router'; +import {ReportsFetcher} from '../../services/reports-fetcher'; +import {DatePipe, isPlatformServer} from '@angular/common'; +import {ScoreBucket, RunGroup} from '../../../../../runner/shared-interfaces'; import { StackedBarChart, StackedBarChartData, } from '../../shared/visualization/stacked-bar-chart/stacked-bar-chart'; -import { MessageSpinner } from '../../shared/message-spinner'; -import { Score } from '../../shared/score/score'; -import { ProviderLabel } from '../../shared/provider-label'; -import { bucketToScoreVariable } from '../../shared/scoring'; +import {MessageSpinner} from '../../shared/message-spinner'; +import {Score} from '../../shared/score/score'; +import {ProviderLabel} from '../../shared/provider-label'; +import {bucketToScoreVariable} from '../../shared/scoring'; @Component({ selector: 'app-report-list', - imports: [ - RouterLink, - DatePipe, - StackedBarChart, - MessageSpinner, - Score, - ProviderLabel, - ], + imports: [RouterLink, DatePipe, StackedBarChart, MessageSpinner, Score, ProviderLabel], templateUrl: './report-list.html', styleUrls: ['./report-list.scss'], }) @@ -46,7 +33,7 @@ export class ReportListComponent { protected allFrameworks = computed(() => { const frameworks = new Map(); - this.allGroups().forEach((group) => { + this.allGroups().forEach(group => { const framework = group.framework.fullStackFramework; frameworks.set(framework.id, framework.displayName); }); @@ -57,9 +44,9 @@ export class ReportListComponent { }); protected allModels = computed(() => { - const models = new Set(this.allGroups().map((g) => g.model)); + const models = new Set(this.allGroups().map(g => g.model)); - return Array.from(models).map((model) => ({ + return Array.from(models).map(model => ({ id: model, displayName: model, })); @@ -68,7 +55,7 @@ export class ReportListComponent { protected allRunners = computed(() => { const runners = new Map(); - this.allGroups().forEach((group) => { + this.allGroups().forEach(group => { if (group.runner) { runners.set(group.runner.id, group.runner.displayName); } @@ -86,9 +73,8 @@ export class ReportListComponent { const runner = this.selectedRunner(); const groups = this.allGroups(); - return groups.filter((group) => { - const frameworkMatch = - !framework || group.framework.fullStackFramework.id === framework; + return groups.filter(group => { + const frameworkMatch = !framework || group.framework.fullStackFramework.id === framework; const modelMatch = !model || group.model === model; const runnerMatch = !runner || group.runner?.id === runner; return frameworkMatch && modelMatch && runnerMatch; @@ -106,7 +92,7 @@ export class ReportListComponent { } protected toggleCompareMode(): void { - this.isCompareMode.update((value) => !value); + this.isCompareMode.update(value => !value); if (!this.isCompareMode()) { this.reportsToCompare.set([]); } @@ -115,11 +101,9 @@ export class ReportListComponent { protected onCheckboxChange(event: Event, id: string) { const checkbox = event.target as HTMLInputElement; if (checkbox.checked) { - this.reportsToCompare.update((reports) => [...reports, id]); + this.reportsToCompare.update(reports => [...reports, id]); } else { - this.reportsToCompare.update((reports) => - reports.filter((r) => r !== id) - ); + this.reportsToCompare.update(reports => reports.filter(r => r !== id)); } } @@ -128,7 +112,7 @@ export class ReportListComponent { } protected removeReportFromComparison(id: string) { - this.reportsToCompare.update((reports) => reports.filter((r) => r !== id)); + this.reportsToCompare.update(reports => reports.filter(r => r !== id)); } protected navigateToComparison() { diff --git a/report-app/src/app/pages/report-viewer/failed-checks-filter.ts b/report-app/src/app/pages/report-viewer/failed-checks-filter.ts index 355185e..f8b1fba 100644 --- a/report-app/src/app/pages/report-viewer/failed-checks-filter.ts +++ b/report-app/src/app/pages/report-viewer/failed-checks-filter.ts @@ -1,4 +1,4 @@ -import { Component, output, input } from '@angular/core'; +import {Component, output, input} from '@angular/core'; @Component({ selector: 'failed-checks-filter', @@ -28,7 +28,7 @@ import { Component, output, input } from '@angular/core'; `, }) export class FailedChecksFilter { - allFailedChecks = input.required<{ name: string; count: number }[]>(); + allFailedChecks = input.required<{name: string; count: number}[]>(); selectedChecks = input.required>(); toggleCheck = output(); } diff --git a/report-app/src/app/pages/report-viewer/formatter.ts b/report-app/src/app/pages/report-viewer/formatter.ts index 226ab12..eeb4a29 100644 --- a/report-app/src/app/pages/report-viewer/formatter.ts +++ b/report-app/src/app/pages/report-viewer/formatter.ts @@ -4,17 +4,14 @@ import * as postcssPlugin from 'prettier/plugins/postcss.js'; import * as estreePlugin from 'prettier/plugins/estree.js'; import * as htmlPlugin from 'prettier/plugins/html.js'; -import { - LlmResponseFile, - RunSummary, -} from '../../../../../runner/shared-interfaces'; +import {LlmResponseFile, RunSummary} from '../../../../../runner/shared-interfaces'; export async function formatFile( file: LlmResponseFile, - framework: RunSummary['framework'] -): Promise { + framework: RunSummary['framework'], +): Promise { // We need to lazy-load Prettier to avoid warnings during SSR. - const format = await import('prettier').then((m) => m.format); + const format = await import('prettier').then(m => m.format); let parser: import('prettier').BuiltInParserName; if (file.filePath.endsWith('.html')) { if (framework?.fullStackFramework.id === 'angular') { @@ -28,7 +25,7 @@ export async function formatFile( parser = 'css'; } else { console.error('No parser for file path:', file.filePath); - return { error: `No parser found for ${file.filePath}.` }; + return {error: `No parser found for ${file.filePath}.`}; } try { @@ -47,6 +44,6 @@ export async function formatFile( }); return result; } catch (e) { - return { error: `Could not format: ${e}` }; + return {error: `Could not format: ${e}`}; } } diff --git a/report-app/src/app/pages/report-viewer/report-viewer.ts b/report-app/src/app/pages/report-viewer/report-viewer.ts index 40ccc8b..42dd379 100644 --- a/report-app/src/app/pages/report-viewer/report-viewer.ts +++ b/report-app/src/app/pages/report-viewer/report-viewer.ts @@ -1,5 +1,5 @@ -import { Clipboard } from '@angular/cdk/clipboard'; -import { DatePipe, DecimalPipe } from '@angular/common'; +import {Clipboard} from '@angular/cdk/clipboard'; +import {DatePipe, DecimalPipe} from '@angular/common'; import { afterNextRender, Component, @@ -11,7 +11,7 @@ import { signal, viewChild, } from '@angular/core'; -import { NgxJsonViewerModule } from 'ngx-json-viewer'; +import {NgxJsonViewerModule} from 'ngx-json-viewer'; import { BuildErrorType, BuildResultStatus, @@ -27,25 +27,21 @@ import { ScoreBucket, SkippedIndividualAssessment, } from '../../../../../runner/shared-interfaces'; -import { CodeViewer } from '../../shared/code-viewer'; -import { ReportsFetcher } from '../../services/reports-fetcher'; +import {CodeViewer} from '../../shared/code-viewer'; +import {ReportsFetcher} from '../../services/reports-fetcher'; import { StackedBarChart, StackedBarChartData, } from '../../shared/visualization/stacked-bar-chart/stacked-bar-chart'; -import { formatFile } from './formatter'; -import { FailedChecksFilter } from './failed-checks-filter'; -import { MessageSpinner } from '../../shared/message-spinner'; -import { createPromptDebuggingZip } from '../../shared/debugging-zip'; -import { Score } from '../../shared/score/score'; -import { - bucketToScoreVariable, - formatScore, - ScoreCssVariable, -} from '../../shared/scoring'; -import { ExpansionPanel } from '../../shared/expansion-panel/expansion-panel'; -import { ExpansionPanelHeader } from '../../shared/expansion-panel/expansion-panel-header'; -import { ProviderLabel } from '../../shared/provider-label'; +import {formatFile} from './formatter'; +import {FailedChecksFilter} from './failed-checks-filter'; +import {MessageSpinner} from '../../shared/message-spinner'; +import {createPromptDebuggingZip} from '../../shared/debugging-zip'; +import {Score} from '../../shared/score/score'; +import {bucketToScoreVariable, formatScore, ScoreCssVariable} from '../../shared/scoring'; +import {ExpansionPanel} from '../../shared/expansion-panel/expansion-panel'; +import {ExpansionPanelHeader} from '../../shared/expansion-panel/expansion-panel-header'; +import {ProviderLabel} from '../../shared/provider-label'; const localReportRegex = /-l\d+$/; @@ -79,15 +75,14 @@ export class ReportViewer { } // Set by the router component input bindings. - protected reportGroupId = input.required({ alias: 'id' }); + protected reportGroupId = input.required({alias: 'id'}); protected formatted = signal>(new Map()); protected formatScore = formatScore; protected error = computed(() => this.selectedReport.error()); private selectedReport = resource({ - params: () => ({ groupId: this.reportGroupId() }), - loader: ({ params }) => - this.reportsFetcher.getCombinedReport(params.groupId), + params: () => ({groupId: this.reportGroupId()}), + loader: ({params}) => this.reportsFetcher.getCombinedReport(params.groupId), }); protected selectedReportWithSortedResults = computed(() => { @@ -99,15 +94,13 @@ export class ReportViewer { id: report.id, group: report.group, details: report.details, - results: [...report.results].sort((a, b) => - a.promptDef.name.localeCompare(b.promptDef.name) - ), + results: [...report.results].sort((a, b) => a.promptDef.name.localeCompare(b.promptDef.name)), }; }); protected overview = computed(() => { const id = this.reportGroupId(); - return this.reportsFetcher.reportGroups().find((group) => group.id === id); + return this.reportsFetcher.reportGroups().find(group => group.id === id); }); protected selectedChecks = signal>(new Set()); @@ -133,20 +126,15 @@ export class ReportViewer { } } for (const checkName of failedChecksInApp) { - failedChecksMap.set( - checkName, - (failedChecksMap.get(checkName) || 0) + 1 - ); + failedChecksMap.set(checkName, (failedChecksMap.get(checkName) || 0) + 1); } } } - const failedChecksArray = Array.from(failedChecksMap.entries()).map( - ([name, count]) => ({ - name, - count, - }) - ); + const failedChecksArray = Array.from(failedChecksMap.entries()).map(([name, count]) => ({ + name, + count, + })); return failedChecksArray.sort((a, b) => a.name.localeCompare(b.name)); }); @@ -163,7 +151,7 @@ export class ReportViewer { return report.results; } - return report.results.filter((result) => { + return report.results.filter(result => { if (result.score.totalPoints === result.score.maxOverallPoints) { return false; } @@ -187,14 +175,8 @@ export class ReportViewer { return null; } - const initialFailures: Record< - string, - { testCase: string; message: string }[] - > = {}; - const repairFailures: Record< - string, - { testCase: string; message: string }[] - > = {}; + const initialFailures: Record = {}; + const repairFailures: Record = {}; for (const result of report.results) { const initialAttempt = result.attemptDetails[0]; @@ -226,12 +208,8 @@ export class ReportViewer { } } - const hasInitialFailures = Object.values(initialFailures).some( - (arr) => arr.length > 0 - ); - const hasRepairFailures = Object.values(repairFailures).some( - (arr) => arr.length > 0 - ); + const hasInitialFailures = Object.values(initialFailures).some(arr => arr.length > 0); + const hasRepairFailures = Object.values(repairFailures).some(arr => arr.length > 0); return { initialFailures: Object.entries(initialFailures), @@ -287,7 +265,7 @@ export class ReportViewer { } protected checksAsGraphData(buckets: ScoreBucket[]): StackedBarChartData { - return buckets.map((b) => ({ + return buckets.map(b => ({ label: b.nameWithLabels, color: bucketToScoreVariable(b), value: b.appsCount, @@ -309,10 +287,7 @@ export class ReportViewer { ]; } - protected securityStatsAsGraphData(stats: { - appsWithErrors: number; - appsWithoutErrors: number; - }) { + protected securityStatsAsGraphData(stats: {appsWithErrors: number; appsWithoutErrors: number}) { return [ { label: 'No exceptions', @@ -369,7 +344,7 @@ export class ReportViewer { } protected isSkippedAssessment( - value: IndividualAssessment | SkippedIndividualAssessment + value: IndividualAssessment | SkippedIndividualAssessment, ): value is SkippedIndividualAssessment { return value.state === IndividualAssessmentState.SKIPPED; } @@ -378,16 +353,13 @@ export class ReportViewer { protected closeDropdownIfOpen(event: MouseEvent): void { const detailsElement = this.dropdownRef()?.nativeElement; - if ( - detailsElement?.hasAttribute('open') && - !detailsElement.contains(event.target) - ) { + if (detailsElement?.hasAttribute('open') && !detailsElement.contains(event.target)) { detailsElement.removeAttribute('open'); } } protected toggleCheckFilter(check: string): void { - this.selectedChecks.update((currentChecks) => { + this.selectedChecks.update(currentChecks => { const checks = new Set(currentChecks); if (checks.has(check)) { checks.delete(check); @@ -399,12 +371,9 @@ export class ReportViewer { } protected async format(file: LlmResponseFile): Promise { - const result = await formatFile( - file, - this.selectedReport.value()!.details.summary.framework - ); + const result = await formatFile(file, this.selectedReport.value()!.details.summary.framework); if (typeof result === 'string') { - this.formatted.update((oldMap) => { + this.formatted.update(oldMap => { const newMap = new Map(oldMap); newMap.set(file, result); return newMap; @@ -422,10 +391,7 @@ export class ReportViewer { * @param app The assessment result for which to create the debugging zip. */ protected async downloadDebuggingZip(app: AssessmentResult): Promise { - const blob = await createPromptDebuggingZip( - this.selectedReport.value()!, - app - ); + const blob = await createPromptDebuggingZip(this.selectedReport.value()!, app); const link = document.createElement('a'); link.href = window.URL.createObjectURL(blob); @@ -445,10 +411,7 @@ export class ReportViewer { } } - protected getDebugCommand( - report: RunInfo, - result: AssessmentResult - ): string | null { + protected getDebugCommand(report: RunInfo, result: AssessmentResult): string | null { // Only show the command for local reports. if (!localReportRegex.test(report.group)) { return null; diff --git a/report-app/src/app/services/app-color-mode.ts b/report-app/src/app/services/app-color-mode.ts index 0040102..00d347b 100644 --- a/report-app/src/app/services/app-color-mode.ts +++ b/report-app/src/app/services/app-color-mode.ts @@ -1,11 +1,11 @@ -import { isPlatformBrowser } from '@angular/common'; -import { inject, Injectable, PLATFORM_ID, signal } from '@angular/core'; +import {isPlatformBrowser} from '@angular/common'; +import {inject, Injectable, PLATFORM_ID, signal} from '@angular/core'; const colorModeStorageKey = 'wcs-color-mode'; export type ColorMode = 'light' | 'dark'; -@Injectable({ providedIn: 'root' }) +@Injectable({providedIn: 'root'}) export class AppColorMode { private currentColorMode = signal('light'); readonly colorMode = this.currentColorMode.asReadonly(); @@ -16,15 +16,11 @@ export class AppColorMode { try { // In some cases accessing localStorage can throw. - colorMode = localStorage.getItem( - colorModeStorageKey - ) as ColorMode | null; + colorMode = localStorage.getItem(colorModeStorageKey) as ColorMode | null; } catch {} if (!colorMode) { - colorMode = matchMedia('(prefers-color-scheme: dark)') - ? 'dark' - : 'light'; + colorMode = matchMedia('(prefers-color-scheme: dark)') ? 'dark' : 'light'; } this.setColorMode(colorMode); diff --git a/report-app/src/app/services/app-resize-notifier.ts b/report-app/src/app/services/app-resize-notifier.ts index 5ad65ad..4c2a068 100644 --- a/report-app/src/app/services/app-resize-notifier.ts +++ b/report-app/src/app/services/app-resize-notifier.ts @@ -1,18 +1,12 @@ -import { isPlatformBrowser } from '@angular/common'; -import { - ApplicationRef, - DestroyRef, - inject, - Injectable, - PLATFORM_ID, -} from '@angular/core'; +import {isPlatformBrowser} from '@angular/common'; +import {ApplicationRef, DestroyRef, inject, Injectable, PLATFORM_ID} from '@angular/core'; /** * This service will be init at the first injection. * * It can be used after the root component is created. */ -@Injectable({ providedIn: 'root' }) +@Injectable({providedIn: 'root'}) export class AppResizeNotifier { constructor() { if (this.isBrowser) { @@ -24,7 +18,7 @@ export class AppResizeNotifier { private _listeners: Array<() => void> = []; notify() { - this._listeners.forEach((l) => l()); + this._listeners.forEach(l => l()); } register(listener: () => void) { diff --git a/report-app/src/app/services/code-highligher.ts b/report-app/src/app/services/code-highligher.ts index 02631a0..eed560b 100644 --- a/report-app/src/app/services/code-highligher.ts +++ b/report-app/src/app/services/code-highligher.ts @@ -1,15 +1,11 @@ -import { Injectable, OnDestroy } from '@angular/core'; -import { - CodeToHastOptions, - createHighlighterCoreSync, - HighlighterCore, -} from 'shiki/core'; -import { createOnigurumaEngine } from 'shiki/engine/oniguruma'; +import {Injectable, OnDestroy} from '@angular/core'; +import {CodeToHastOptions, createHighlighterCoreSync, HighlighterCore} from 'shiki/core'; +import {createOnigurumaEngine} from 'shiki/engine/oniguruma'; import angularTs from '@shikijs/langs/angular-ts'; import githubLight from '@shikijs/themes/github-light'; import githubDark from '@shikijs/themes/github-dark'; -@Injectable({ providedIn: 'root' }) +@Injectable({providedIn: 'root'}) export class CodeHighligher implements OnDestroy { private cachedHighligher: HighlighterCore | undefined; diff --git a/report-app/src/app/services/google-charts-loader.ts b/report-app/src/app/services/google-charts-loader.ts index 16c65a9..201b426 100644 --- a/report-app/src/app/services/google-charts-loader.ts +++ b/report-app/src/app/services/google-charts-loader.ts @@ -1,9 +1,9 @@ /// -import { isPlatformBrowser } from '@angular/common'; -import { inject, Injectable, PLATFORM_ID } from '@angular/core'; +import {isPlatformBrowser} from '@angular/common'; +import {inject, Injectable, PLATFORM_ID} from '@angular/core'; -@Injectable({ providedIn: 'root' }) +@Injectable({providedIn: 'root'}) export class GoogleChartsLoader { private _resolveReadyPromise: (() => void) | null = null; private platformId = inject(PLATFORM_ID); @@ -12,7 +12,7 @@ export class GoogleChartsLoader { ready: Promise; constructor() { - this.ready = new Promise((resolve) => { + this.ready = new Promise(resolve => { this._resolveReadyPromise = resolve; }); } @@ -21,7 +21,7 @@ export class GoogleChartsLoader { initialize() { if (isPlatformBrowser(this.platformId)) { // Load the Visualization API and the corechart package. - google.charts.load('current', { packages: ['corechart'] }); + google.charts.load('current', {packages: ['corechart']}); // Set a callback to run when the Google Visualization API is loaded. google.charts.setOnLoadCallback(() => { diff --git a/report-app/src/app/services/reports-fetcher.ts b/report-app/src/app/services/reports-fetcher.ts index d4c3b79..6048bf7 100644 --- a/report-app/src/app/services/reports-fetcher.ts +++ b/report-app/src/app/services/reports-fetcher.ts @@ -1,15 +1,8 @@ -import { - computed, - inject, - Injectable, - PLATFORM_ID, - resource, - signal, -} from '@angular/core'; -import { RunGroup, RunInfo } from '../../../../runner/shared-interfaces'; -import { isPlatformBrowser } from '@angular/common'; - -@Injectable({ providedIn: 'root' }) +import {computed, inject, Injectable, PLATFORM_ID, resource, signal} from '@angular/core'; +import {RunGroup, RunInfo} from '../../../../runner/shared-interfaces'; +import {isPlatformBrowser} from '@angular/common'; + +@Injectable({providedIn: 'root'}) export class ReportsFetcher { private readonly platformId = inject(PLATFORM_ID); private readonly pendingFetches = signal(0); @@ -29,8 +22,7 @@ export class ReportsFetcher { const groups = (await response.json()) as RunGroup[]; return groups.sort( - (a, b) => - new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime() + (a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime(), ); }, }); @@ -42,13 +34,11 @@ export class ReportsFetcher { readonly reportGroupsError = computed(() => this.groupsResource.error()); readonly isLoadingSingleReport = computed(() => this.pendingFetches() > 0); - readonly isLoadingReportsList = computed(() => - this.groupsResource.isLoading() - ); + readonly isLoadingReportsList = computed(() => this.groupsResource.isLoading()); async getCombinedReport(groupId: string): Promise { if (!this.runCache.has(groupId)) { - this.pendingFetches.update((current) => current + 1); + this.pendingFetches.update(current => current + 1); try { const response = await fetch(`/api/reports/${groupId}`); @@ -68,12 +58,12 @@ export class ReportsFetcher { id: firstRun.id, group: firstRun.group, details: firstRun.details, - results: allRuns.flatMap((run) => run.results), + results: allRuns.flatMap(run => run.results), } satisfies RunInfo; this.runCache.set(groupId, combined); } finally { - this.pendingFetches.update((current) => current - 1); + this.pendingFetches.update(current => current - 1); } } diff --git a/report-app/src/app/shared/code-viewer.ts b/report-app/src/app/shared/code-viewer.ts index 09f041b..9c3e8c7 100644 --- a/report-app/src/app/shared/code-viewer.ts +++ b/report-app/src/app/shared/code-viewer.ts @@ -7,9 +7,9 @@ import { signal, ViewEncapsulation, } from '@angular/core'; -import { SafeHtml } from '@angular/platform-browser'; -import { CodeHighligher } from '../services/code-highligher'; -import { AppColorMode } from '../services/app-color-mode'; +import {SafeHtml} from '@angular/platform-browser'; +import {CodeHighligher} from '../services/code-highligher'; +import {AppColorMode} from '../services/app-color-mode'; @Component({ selector: 'app-code-viewer', diff --git a/report-app/src/app/shared/comparison/comparison-build-distribution.ts b/report-app/src/app/shared/comparison/comparison-build-distribution.ts index 0509789..9b4d3f2 100644 --- a/report-app/src/app/shared/comparison/comparison-build-distribution.ts +++ b/report-app/src/app/shared/comparison/comparison-build-distribution.ts @@ -6,14 +6,14 @@ import { input, PLATFORM_ID, } from '@angular/core'; -import { AggregatedRunStats } from '../../../../../runner/shared-interfaces'; +import {AggregatedRunStats} from '../../../../../runner/shared-interfaces'; import { ComparisonStackedBarChart, ComparisonStackedBarChartData, } from '../visualization/comparison-stacked-bar-chart'; -import { ModelComparisonData } from './comparison-data'; -import { getHardcodedColor, ScoreCssVariable } from '../scoring'; -import { AppColorMode } from '../../services/app-color-mode'; +import {ModelComparisonData} from './comparison-data'; +import {getHardcodedColor, ScoreCssVariable} from '../scoring'; +import {AppColorMode} from '../../services/app-color-mode'; @Component({ selector: 'comparison-build-distribution', @@ -36,46 +36,28 @@ export class ComparisonBuildDistribution { seriesColumns: [ { name: 'Successful initial builds', - color: getHardcodedColor( - this.platformId, - ScoreCssVariable.excellent, - colorMode - ), + color: getHardcodedColor(this.platformId, ScoreCssVariable.excellent, colorMode), }, { name: 'Successful builds after repair', - color: getHardcodedColor( - this.platformId, - ScoreCssVariable.great, - colorMode - ), + color: getHardcodedColor(this.platformId, ScoreCssVariable.great, colorMode), }, { name: 'Failed builds', - color: getHardcodedColor( - this.platformId, - ScoreCssVariable.poor, - colorMode - ), + color: getHardcodedColor(this.platformId, ScoreCssVariable.poor, colorMode), }, ], series: [], }; - const addSeriesData = ( - type: string, - stats: AggregatedRunStats, - appsCount: number - ) => { + const addSeriesData = (type: string, stats: AggregatedRunStats, appsCount: number) => { const successfulInitialBuilds = parseFloat( - (stats.builds.successfulInitialBuilds / appsCount).toFixed(3) + (stats.builds.successfulInitialBuilds / appsCount).toFixed(3), ); const successfulRepairedBuilds = parseFloat( - (stats.builds.successfulBuildsAfterRepair / appsCount).toFixed(3) - ); - const failedBuilds = parseFloat( - (stats.builds.failedBuilds / appsCount).toFixed(3) + (stats.builds.successfulBuildsAfterRepair / appsCount).toFixed(3), ); + const failedBuilds = parseFloat((stats.builds.failedBuilds / appsCount).toFixed(3)); result.series.push({ name: type, @@ -96,8 +78,7 @@ export class ComparisonBuildDistribution { }, { value: failedBuilds, - label: - failedBuilds > 0.05 ? `${(failedBuilds * 100).toFixed(1)}%` : '', + label: failedBuilds > 0.05 ? `${(failedBuilds * 100).toFixed(1)}%` : '', }, ], }); @@ -113,12 +94,11 @@ export class ComparisonBuildDistribution { readonly percentagesForTextOverview = computed(() => { const data = this.data(); - return data.series.map((s) => { + return data.series.map(s => { return { name: s.name, successfulBuilds: ( - ((s.stats.builds.successfulInitialBuilds + - s.stats.builds.successfulBuildsAfterRepair) / + ((s.stats.builds.successfulInitialBuilds + s.stats.builds.successfulBuildsAfterRepair) / s.appsCount) * 100 ).toFixed(1), diff --git a/report-app/src/app/shared/comparison/comparison-data.ts b/report-app/src/app/shared/comparison/comparison-data.ts index f37b952..316c670 100644 --- a/report-app/src/app/shared/comparison/comparison-data.ts +++ b/report-app/src/app/shared/comparison/comparison-data.ts @@ -1,4 +1,4 @@ -import { AggregatedRunStats } from '../../../../../runner/shared-interfaces'; +import {AggregatedRunStats} from '../../../../../runner/shared-interfaces'; export interface ModelComparisonData { series: Array<{ diff --git a/report-app/src/app/shared/comparison/comparison-runtime-distribution.ts b/report-app/src/app/shared/comparison/comparison-runtime-distribution.ts index d2c0752..013b84f 100644 --- a/report-app/src/app/shared/comparison/comparison-runtime-distribution.ts +++ b/report-app/src/app/shared/comparison/comparison-runtime-distribution.ts @@ -6,14 +6,14 @@ import { input, PLATFORM_ID, } from '@angular/core'; -import { AggregatedRunStats } from '../../../../../runner/shared-interfaces'; +import {AggregatedRunStats} from '../../../../../runner/shared-interfaces'; import { ComparisonStackedBarChart, ComparisonStackedBarChartData, } from '../visualization/comparison-stacked-bar-chart'; -import { ModelComparisonData } from './comparison-data'; -import { getHardcodedColor, ScoreCssVariable } from '../scoring'; -import { AppColorMode } from '../../services/app-color-mode'; +import {ModelComparisonData} from './comparison-data'; +import {getHardcodedColor, ScoreCssVariable} from '../scoring'; +import {AppColorMode} from '../../services/app-color-mode'; @Component({ selector: 'comparison-runtime-distribution', @@ -36,48 +36,32 @@ export class ComparisonRuntimeDistribution { seriesColumns: [ { name: 'No runtime errors', - color: getHardcodedColor( - this.platformId, - ScoreCssVariable.excellent, - colorMode - ), + color: getHardcodedColor(this.platformId, ScoreCssVariable.excellent, colorMode), }, { name: 'With runtime errors', - color: getHardcodedColor( - this.platformId, - ScoreCssVariable.poor, - colorMode - ), + color: getHardcodedColor(this.platformId, ScoreCssVariable.poor, colorMode), }, { name: 'Did not run', - color: getHardcodedColor( - this.platformId, - ScoreCssVariable.neutral, - colorMode - ), + color: getHardcodedColor(this.platformId, ScoreCssVariable.neutral, colorMode), }, ], series: [], }; - const addSeriesData = ( - type: string, - stats: AggregatedRunStats, - total: number - ) => { + const addSeriesData = (type: string, stats: AggregatedRunStats, total: number) => { // TODO: We should make `runtime` error collection required at this point. if (!stats.runtime) { console.error('No runtime stats for report in comparison. Skipping'); return; } - const { appsWithoutErrors, appsWithErrors } = stats.runtime; + const {appsWithoutErrors, appsWithErrors} = stats.runtime; const withoutErrors = parseFloat((appsWithoutErrors / total).toFixed(3)); const withErrors = parseFloat((appsWithErrors / total).toFixed(3)); const remainder = parseFloat( - ((total - (appsWithErrors + appsWithoutErrors)) / total).toFixed(3) + ((total - (appsWithErrors + appsWithoutErrors)) / total).toFixed(3), ); result.series.push({ @@ -85,10 +69,7 @@ export class ComparisonRuntimeDistribution { values: [ { value: withoutErrors, - label: - withoutErrors > 0.05 - ? `${(withoutErrors * 100).toFixed(1)}%` - : '', + label: withoutErrors > 0.05 ? `${(withoutErrors * 100).toFixed(1)}%` : '', }, { value: withErrors, diff --git a/report-app/src/app/shared/comparison/comparison-score-distribution.ts b/report-app/src/app/shared/comparison/comparison-score-distribution.ts index 80ec907..e67ec64 100644 --- a/report-app/src/app/shared/comparison/comparison-score-distribution.ts +++ b/report-app/src/app/shared/comparison/comparison-score-distribution.ts @@ -6,15 +6,15 @@ import { input, PLATFORM_ID, } from '@angular/core'; -import { isPositiveScore } from '../../../../../runner/ratings/stats'; -import { AggregatedRunStats } from '../../../../../runner/shared-interfaces'; +import {isPositiveScore} from '../../../../../runner/ratings/stats'; +import {AggregatedRunStats} from '../../../../../runner/shared-interfaces'; import { ComparisonStackedBarChart, ComparisonStackedBarChartData, } from '../visualization/comparison-stacked-bar-chart'; -import { ModelComparisonData } from './comparison-data'; -import { bucketToScoreVariable, getHardcodedColor } from '../scoring'; -import { AppColorMode } from '../../services/app-color-mode'; +import {ModelComparisonData} from './comparison-data'; +import {bucketToScoreVariable, getHardcodedColor} from '../scoring'; +import {AppColorMode} from '../../services/app-color-mode'; @Component({ selector: 'comparison-score-distribution', @@ -38,32 +38,22 @@ export class ComparisonScoreDistribution { series: [], }; - const addSeriesData = ( - type: string, - distribution: AggregatedRunStats, - appsCount: number - ) => { + const addSeriesData = (type: string, distribution: AggregatedRunStats, appsCount: number) => { // The buckets are the same for all results so populate // the columns when the first result is added. if (result.seriesColumns.length === 0) { for (const bucket of distribution.buckets) { result.seriesColumns.push({ name: `${bucket.name} score`, - color: getHardcodedColor( - this.platformId, - bucketToScoreVariable(bucket), - colorMode - ), + color: getHardcodedColor(this.platformId, bucketToScoreVariable(bucket), colorMode), }); } } result.series.push({ name: type, - values: distribution.buckets.map((bucket) => { - const percentage = parseFloat( - (bucket.appsCount / appsCount).toFixed(3) - ); + values: distribution.buckets.map(bucket => { + const percentage = parseFloat((bucket.appsCount / appsCount).toFixed(3)); return { value: percentage, label: percentage > 0.05 ? `${(percentage * 100).toFixed(1)}%` : '', @@ -82,10 +72,10 @@ export class ComparisonScoreDistribution { readonly percentagesForTextOverview = computed(() => { const data = this.data(); - return data.series.map((s) => { + return data.series.map(s => { const goodOrBetterCount = s.stats.buckets.reduce( (sum, bucket) => sum + (isPositiveScore(bucket) ? bucket.appsCount : 0), - 0 + 0, ); return { diff --git a/report-app/src/app/shared/debugging-zip.ts b/report-app/src/app/shared/debugging-zip.ts index ec2e5b0..ae3aad9 100644 --- a/report-app/src/app/shared/debugging-zip.ts +++ b/report-app/src/app/shared/debugging-zip.ts @@ -1,8 +1,5 @@ -import { BuildResultStatus } from '../../../../runner/workers/builder/builder-types'; -import { - AssessmentResult, - RunInfo, -} from '../../../../runner/shared-interfaces'; +import {BuildResultStatus} from '../../../../runner/workers/builder/builder-types'; +import {AssessmentResult, RunInfo} from '../../../../runner/shared-interfaces'; import JsZip from 'jszip'; /** @@ -12,16 +9,10 @@ import JsZip from 'jszip'; * @param app The assessment result for which to create the debugging zip. * @returns A promise that resolves with the generated ZIP file as a Blob. */ -export async function createPromptDebuggingZip( - run: RunInfo, - app: AssessmentResult -): Promise { +export async function createPromptDebuggingZip(run: RunInfo, app: AssessmentResult): Promise { const zip = new JsZip(); - zip.file( - 'prompt.md', - `${run.details.systemPromptGeneration}\n\n${app.promptDef.prompt}` - ); + zip.file('prompt.md', `${run.details.systemPromptGeneration}\n\n${app.promptDef.prompt}`); let generatedFiles = ``; for (const file of app.outputFiles) { diff --git a/report-app/src/app/shared/expansion-panel/expansion-panel-header.ts b/report-app/src/app/shared/expansion-panel/expansion-panel-header.ts index 45e1e51..5cb4fad 100644 --- a/report-app/src/app/shared/expansion-panel/expansion-panel-header.ts +++ b/report-app/src/app/shared/expansion-panel/expansion-panel-header.ts @@ -1,4 +1,4 @@ -import { Component, ViewEncapsulation } from '@angular/core'; +import {Component, ViewEncapsulation} from '@angular/core'; @Component({ selector: 'expansion-panel-header', diff --git a/report-app/src/app/shared/expansion-panel/expansion-panel.ts b/report-app/src/app/shared/expansion-panel/expansion-panel.ts index bbe3143..6ce23be 100644 --- a/report-app/src/app/shared/expansion-panel/expansion-panel.ts +++ b/report-app/src/app/shared/expansion-panel/expansion-panel.ts @@ -1,4 +1,4 @@ -import { Component, input, model } from '@angular/core'; +import {Component, input, model} from '@angular/core'; @Component({ selector: 'expansion-panel', diff --git a/report-app/src/app/shared/message-spinner.ts b/report-app/src/app/shared/message-spinner.ts index ad79513..72a3d99 100644 --- a/report-app/src/app/shared/message-spinner.ts +++ b/report-app/src/app/shared/message-spinner.ts @@ -1,5 +1,5 @@ -import { Component, input } from '@angular/core'; -import { Spinner } from './spinner/spinner'; +import {Component, input} from '@angular/core'; +import {Spinner} from './spinner/spinner'; @Component({ selector: 'message-spinner', diff --git a/report-app/src/app/shared/provider-label.ts b/report-app/src/app/shared/provider-label.ts index 62b84c6..b2e9859 100644 --- a/report-app/src/app/shared/provider-label.ts +++ b/report-app/src/app/shared/provider-label.ts @@ -1,4 +1,4 @@ -import { Component, computed, input } from '@angular/core'; +import {Component, computed, input} from '@angular/core'; const exactMatches: Record = { angular: 'frameworks/angular.png', @@ -51,9 +51,7 @@ export class ProviderLabel { return null; } - return exactMatches.hasOwnProperty(id) - ? exactMatches[id] - : getModelLogoURL(id); + return exactMatches.hasOwnProperty(id) ? exactMatches[id] : getModelLogoURL(id); }); } diff --git a/report-app/src/app/shared/report-select/report-select.ts b/report-app/src/app/shared/report-select/report-select.ts index df1a9e8..3ed0208 100644 --- a/report-app/src/app/shared/report-select/report-select.ts +++ b/report-app/src/app/shared/report-select/report-select.ts @@ -1,6 +1,6 @@ -import { Component, computed, input, model } from '@angular/core'; -import { RunGroup } from '../../../../../runner/shared-interfaces'; -import { DatePipe } from '@angular/common'; +import {Component, computed, input, model} from '@angular/core'; +import {RunGroup} from '../../../../../runner/shared-interfaces'; +import {DatePipe} from '@angular/common'; @Component({ selector: 'report-select', @@ -22,14 +22,14 @@ export class ReportSelect { acc[dateGroup].push(group); return acc; }, - {} as { [key: string]: RunGroup[] } + {} as {[key: string]: RunGroup[]}, ); const sortedDateGroups = Object.keys(grouped).sort( - (a, b) => new Date(b).getTime() - new Date(a).getTime() + (a, b) => new Date(b).getTime() - new Date(a).getTime(), ); - return sortedDateGroups.map((dateGroup) => { + return sortedDateGroups.map(dateGroup => { const options = grouped[dateGroup]; options.sort((a, b) => { const timeA = new Date(a.timestamp).getTime(); @@ -44,7 +44,7 @@ export class ReportSelect { }); readonly selectedReport = computed(() => { - return this.reports().find((r) => r.id === this.selection()); + return this.reports().find(r => r.id === this.selection()); }); onSelect(event: Event) { diff --git a/report-app/src/app/shared/score/score.ts b/report-app/src/app/shared/score/score.ts index 078ca13..e2b6bfe 100644 --- a/report-app/src/app/shared/score/score.ts +++ b/report-app/src/app/shared/score/score.ts @@ -1,5 +1,5 @@ -import { Component, computed, input } from '@angular/core'; -import { formatScore } from '../scoring'; +import {Component, computed, input} from '@angular/core'; +import {formatScore} from '../scoring'; @Component({ selector: 'score', @@ -20,9 +20,7 @@ export class Score { readonly size = input<'small' | 'medium' | 'large'>('medium'); readonly label = input(''); - protected formattedScore = computed(() => - formatScore(this.total(), this.max()) - ); + protected formattedScore = computed(() => formatScore(this.total(), this.max())); protected scoreClass = computed(() => { const percentage = this.formattedScore(); diff --git a/report-app/src/app/shared/scoring.ts b/report-app/src/app/shared/scoring.ts index 9523da5..e87a73d 100644 --- a/report-app/src/app/shared/scoring.ts +++ b/report-app/src/app/shared/scoring.ts @@ -1,6 +1,6 @@ -import { isPlatformServer } from '@angular/common'; -import { ScoreBucket } from '../../../../runner/shared-interfaces'; -import { ColorMode } from '../services/app-color-mode'; +import {isPlatformServer} from '@angular/common'; +import {ScoreBucket} from '../../../../runner/shared-interfaces'; +import {ColorMode} from '../services/app-color-mode'; export enum ScoreCssVariable { excellent = 'var(--status-fill-excellent)', @@ -32,7 +32,7 @@ export function bucketToScoreVariable(bucket: ScoreBucket): ScoreCssVariable { export function getHardcodedColor( platformId: Object, color: `var(${string})`, - colorMode: ColorMode + colorMode: ColorMode, ): string { const varName = getValueInParens(color); @@ -42,16 +42,12 @@ export function getHardcodedColor( } if (!CACHED_COLORS[colorMode][varName]) { - const computed = window - .getComputedStyle(document.body) - .getPropertyValue(varName); + const computed = window.getComputedStyle(document.body).getPropertyValue(varName); let value: string; if (computed.startsWith('light-dark')) { const inner = getValueInParens(computed) || 'transparent, transparent'; - value = inner.split(',').map((part) => part.trim())[ - colorMode === 'light' ? 0 : 1 - ]; + value = inner.split(',').map(part => part.trim())[colorMode === 'light' ? 0 : 1]; } else { value = computed; } diff --git a/report-app/src/app/shared/spinner/spinner.ts b/report-app/src/app/shared/spinner/spinner.ts index 83c6a12..d7f9431 100644 --- a/report-app/src/app/shared/spinner/spinner.ts +++ b/report-app/src/app/shared/spinner/spinner.ts @@ -1,4 +1,4 @@ -import { Component } from '@angular/core'; +import {Component} from '@angular/core'; @Component({ selector: 'spinner', diff --git a/report-app/src/app/shared/visualization/comparison-stacked-bar-chart.ts b/report-app/src/app/shared/visualization/comparison-stacked-bar-chart.ts index 992fa54..7f397c6 100644 --- a/report-app/src/app/shared/visualization/comparison-stacked-bar-chart.ts +++ b/report-app/src/app/shared/visualization/comparison-stacked-bar-chart.ts @@ -7,13 +7,13 @@ import { input, viewChild, } from '@angular/core'; -import { GoogleChartsLoader } from '../../services/google-charts-loader'; -import { AppResizeNotifier } from '../../services/app-resize-notifier'; -import { AppColorMode } from '../../services/app-color-mode'; +import {GoogleChartsLoader} from '../../services/google-charts-loader'; +import {AppResizeNotifier} from '../../services/app-resize-notifier'; +import {AppColorMode} from '../../services/app-color-mode'; export interface ComparisonStackedBarChartData { title: string; - seriesColumns: Array<{ name: string; color: string }>; + seriesColumns: Array<{name: string; color: string}>; series: Array<{ name: string; values: Array<{ @@ -52,7 +52,7 @@ export class ComparisonStackedBarChart { for (const column of data.seriesColumns) { table.addColumn('number', column.name); - table.addColumn({ role: 'annotation' }); + table.addColumn({role: 'annotation'}); } for (const s of data.series) { @@ -68,24 +68,22 @@ export class ComparisonStackedBarChart { // The chart library seems to ignore CSS variable colors so we need to hardcode them. const textColor = colorMode === 'dark' ? '#f9fafb' : '#1e293b'; - const chart = new google.visualization.BarChart( - this.chartEl().nativeElement - ); + const chart = new google.visualization.BarChart(this.chartEl().nativeElement); chart.draw(table, { title: data.title, - titleTextStyle: { color: textColor }, + titleTextStyle: {color: textColor}, backgroundColor: 'transparent', hAxis: { minTextSpacing: 20, - textStyle: { fontSize: 10, color: textColor }, + textStyle: {fontSize: 10, color: textColor}, format: 'percent', }, - legend: { textStyle: { color: textColor } }, + legend: {textStyle: {color: textColor}}, isStacked: 'percent', series: data.seriesColumns.reduce( - (res, s, index) => ({ ...res, [index]: { color: s.color } }), - {} + (res, s, index) => ({...res, [index]: {color: s.color}}), + {}, ), chartArea: { left: 250, @@ -108,7 +106,7 @@ export class ComparisonStackedBarChart { minTextSpacing: 20, viewWindowMode: 'maximized', showTextEvery: 1, - textStyle: { color: textColor }, + textStyle: {color: textColor}, }, // TODO: Consider enabling trendlines. // trendlines: { 0: {}, 1: {} }, diff --git a/report-app/src/app/shared/visualization/score-visualization.ts b/report-app/src/app/shared/visualization/score-visualization.ts index 52eb1d3..9327fe2 100644 --- a/report-app/src/app/shared/visualization/score-visualization.ts +++ b/report-app/src/app/shared/visualization/score-visualization.ts @@ -1,14 +1,7 @@ -import { - afterRenderEffect, - Component, - ElementRef, - inject, - input, - viewChild, -} from '@angular/core'; -import { RunGroup } from '../../../../../runner/shared-interfaces'; -import { GoogleChartsLoader } from '../../services/google-charts-loader'; -import { AppResizeNotifier } from '../../services/app-resize-notifier'; +import {afterRenderEffect, Component, ElementRef, inject, input, viewChild} from '@angular/core'; +import {RunGroup} from '../../../../../runner/shared-interfaces'; +import {GoogleChartsLoader} from '../../services/google-charts-loader'; +import {AppResizeNotifier} from '../../services/app-resize-notifier'; @Component({ selector: 'score-visualization', @@ -50,21 +43,20 @@ export class ScoreVisualization { timestamp: dayDate, }; dataRows[dayKey].buildQualityPercentages.push( - (buildStats.successfulBuildsAfterRepair + - buildStats.successfulInitialBuilds) / - group.appsCount + (buildStats.successfulBuildsAfterRepair + buildStats.successfulInitialBuilds) / + group.appsCount, ); dataRows[dayKey].overallQualityPercentages.push(overallQuality); appsCount += group.appsCount; } - return { dataRows, averageAppsCount: appsCount / this.groups().length }; + return {dataRows, averageAppsCount: appsCount / this.groups().length}; } private async _renderChart() { // Note: we need to call `_processData` synchronously // so the wrapping effect picks up the data dependency. - const { dataRows, averageAppsCount } = this._processData(); + const {dataRows, averageAppsCount} = this._processData(); await this.googleChartsLoader.ready; @@ -75,19 +67,16 @@ export class ScoreVisualization { table.addColumn('number', 'Overall Quality'); table.addRows( - Object.values(dataRows).map((r) => [ + Object.values(dataRows).map(r => [ r.timestamp, // TODO: Consider incorporating build quality scores. // r.buildQualityPercentages.reduce((a, b) => a + b) / // r.buildQualityPercentages.length, - r.overallQualityPercentages.reduce((a, b) => a + b) / - r.overallQualityPercentages.length, - ]) + r.overallQualityPercentages.reduce((a, b) => a + b) / r.overallQualityPercentages.length, + ]), ); - const chart = new google.visualization.LineChart( - this.chartContainer().nativeElement - ); + const chart = new google.visualization.LineChart(this.chartContainer().nativeElement); chart.draw(table, { curveType: 'function', title: `Score average over time (~${averageAppsCount.toFixed(0)} apps generated per day)`, @@ -96,7 +85,7 @@ export class ScoreVisualization { }, hAxis: { minTextSpacing: 20, - textStyle: { fontSize: 10 }, + textStyle: {fontSize: 10}, }, chartArea: { left: 50, diff --git a/report-app/src/app/shared/visualization/stacked-bar-chart/stacked-bar-chart.ts b/report-app/src/app/shared/visualization/stacked-bar-chart/stacked-bar-chart.ts index aa88d0f..3d20c98 100644 --- a/report-app/src/app/shared/visualization/stacked-bar-chart/stacked-bar-chart.ts +++ b/report-app/src/app/shared/visualization/stacked-bar-chart/stacked-bar-chart.ts @@ -1,4 +1,4 @@ -import { Component, computed, input, signal } from '@angular/core'; +import {Component, computed, input, signal} from '@angular/core'; export type StackedBarChartData = Array<{ label: string; @@ -16,9 +16,7 @@ export class StackedBarChart { compact = input(false); showLegend = input(true); - total = computed(() => - this.data().reduce((acc, item) => acc + item.value, 0) - ); + total = computed(() => this.data().reduce((acc, item) => acc + item.value, 0)); protected displayPercentage = signal(false); @@ -29,13 +27,11 @@ export class StackedBarChart { } toggleDisplayMode(): void { - this.displayPercentage.update((current) => !current); + this.displayPercentage.update(current => !current); } getItemDisplayValue(item: StackedBarChartData[0]): string { if (item.value === 0) return ''; - return this.displayPercentage() - ? `${this.asPercent(item.value)}%` - : `${item.value}`; + return this.displayPercentage() ? `${this.asPercent(item.value)}%` : `${item.value}`; } } diff --git a/report-app/src/main.server.ts b/report-app/src/main.server.ts index 6930b5c..5b24e7e 100644 --- a/report-app/src/main.server.ts +++ b/report-app/src/main.server.ts @@ -1,6 +1,6 @@ -import { bootstrapApplication } from '@angular/platform-browser'; -import { App } from './app/app'; -import { serverConfig } from './app/app.config.server'; +import {bootstrapApplication} from '@angular/platform-browser'; +import {App} from './app/app'; +import {serverConfig} from './app/app.config.server'; const bootstrap = () => bootstrapApplication(App, serverConfig); diff --git a/report-app/src/main.ts b/report-app/src/main.ts index 190f341..1732e54 100644 --- a/report-app/src/main.ts +++ b/report-app/src/main.ts @@ -1,5 +1,5 @@ -import { bootstrapApplication } from '@angular/platform-browser'; -import { appConfig } from './app/app.config'; -import { App } from './app/app'; +import {bootstrapApplication} from '@angular/platform-browser'; +import {appConfig} from './app/app.config'; +import {App} from './app/app'; -bootstrapApplication(App, appConfig).catch((err) => console.error(err)); +bootstrapApplication(App, appConfig).catch(err => console.error(err)); diff --git a/runner/bin/cli.ts b/runner/bin/cli.ts index fae080f..0b26861 100644 --- a/runner/bin/cli.ts +++ b/runner/bin/cli.ts @@ -1,11 +1,11 @@ #!/usr/bin/env node import yargs from 'yargs'; -import { hideBin } from 'yargs/helpers'; -import { EvalModule } from '../eval-cli.js'; -import { ReportModule } from '../report-cli.js'; -import { InitModule } from '../init-cli.js'; -import { RunModule } from '../run-cli.js'; +import {hideBin} from 'yargs/helpers'; +import {EvalModule} from '../eval-cli.js'; +import {ReportModule} from '../report-cli.js'; +import {InitModule} from '../init-cli.js'; +import {RunModule} from '../run-cli.js'; yargs() .scriptName('web-codegen-scorer') diff --git a/runner/codegen/gemini-cli/directory-snapshot.ts b/runner/codegen/gemini-cli/directory-snapshot.ts index eb20049..66cefe9 100644 --- a/runner/codegen/gemini-cli/directory-snapshot.ts +++ b/runner/codegen/gemini-cli/directory-snapshot.ts @@ -1,17 +1,17 @@ -import { glob } from 'tinyglobby'; -import { readFile } from 'fs/promises'; -import { fileTypeFromBuffer } from 'file-type'; +import {glob} from 'tinyglobby'; +import {readFile} from 'fs/promises'; +import {fileTypeFromBuffer} from 'file-type'; /** Represents a snapshot of a directory at a certain point in time. */ export class DirectorySnapshot { private constructor( readonly files: ReadonlyMap, - readonly directory: string + readonly directory: string, ) {} static async forDirectory( directory: string, - ignoredPatterns: string[] + ignoredPatterns: string[], ): Promise { const paths = await glob('**/*', { cwd: directory, @@ -22,7 +22,7 @@ export class DirectorySnapshot { const files = new Map(); await Promise.all( - paths.map(async (path) => { + paths.map(async path => { const buffer = await readFile(path); const binaryType = await fileTypeFromBuffer(buffer); @@ -30,7 +30,7 @@ export class DirectorySnapshot { if (!binaryType) { files.set(path, buffer.toString()); } - }) + }), ); return new DirectorySnapshot(files, directory); diff --git a/runner/codegen/gemini-cli/gemini-cli-runner.ts b/runner/codegen/gemini-cli/gemini-cli-runner.ts index 1be4794..abe4dbb 100644 --- a/runner/codegen/gemini-cli/gemini-cli-runner.ts +++ b/runner/codegen/gemini-cli/gemini-cli-runner.ts @@ -1,4 +1,4 @@ -import { ChildProcess, spawn } from 'child_process'; +import {ChildProcess, spawn} from 'child_process'; import { LlmConstrainedOutputGenerateResponse, LlmGenerateFilesContext, @@ -7,24 +7,20 @@ import { LlmGenerateTextResponse, LlmRunner, } from '../llm-runner.js'; -import { join, relative } from 'path'; -import { existsSync, mkdirSync } from 'fs'; -import { writeFile } from 'fs/promises'; +import {join, relative} from 'path'; +import {existsSync, mkdirSync} from 'fs'; +import {writeFile} from 'fs/promises'; import { getGeminiIgnoreFile, getGeminiInstructionsFile, getGeminiSettingsFile, } from './gemini-files.js'; -import { DirectorySnapshot } from './directory-snapshot.js'; -import { LlmResponseFile } from '../../shared-interfaces.js'; -import { UserFacingError } from '../../utils/errors.js'; +import {DirectorySnapshot} from './directory-snapshot.js'; +import {LlmResponseFile} from '../../shared-interfaces.js'; +import {UserFacingError} from '../../utils/errors.js'; import assert from 'assert'; -const SUPPORTED_MODELS = [ - 'gemini-2.5-pro', - 'gemini-2.5-flash', - 'gemini-2.5-flash-lite', -]; +const SUPPORTED_MODELS = ['gemini-2.5-pro', 'gemini-2.5-flash', 'gemini-2.5-flash-lite']; /** Runner that generates code using the Gemini CLI. */ export class GeminiCliRunner implements LlmRunner { @@ -42,21 +38,19 @@ export class GeminiCliRunner implements LlmRunner { '**/.geminiignore', ]; - async generateFiles( - options: LlmGenerateFilesRequestOptions - ): Promise { - const { context, model } = options; + async generateFiles(options: LlmGenerateFilesRequestOptions): Promise { + const {context, model} = options; // TODO: Consider removing these assertions when we have better types here. // These fields are always set when running in a local environment, and this // is a requirement for selecting the `gemini-cli` runner. assert( context.buildCommand, - 'Expected a `buildCommand` to be set in the LLM generate request context' + 'Expected a `buildCommand` to be set in the LLM generate request context', ); assert( context.packageManager, - 'Expected a `packageManager` to be set in the LLM generate request context' + 'Expected a `packageManager` to be set in the LLM generate request context', ); const ignoreFilePath = join(context.directory, '.geminiignore'); @@ -64,7 +58,7 @@ export class GeminiCliRunner implements LlmRunner { const settingsDir = join(context.directory, '.gemini'); const initialSnapshot = await DirectorySnapshot.forDirectory( context.directory, - this.evalIgnoredPatterns + this.evalIgnoredPatterns, ); mkdirSync(settingsDir); @@ -73,24 +67,18 @@ export class GeminiCliRunner implements LlmRunner { writeFile(ignoreFilePath, getGeminiIgnoreFile()), writeFile( instructionFilePath, - getGeminiInstructionsFile( - context.systemInstructions, - context.buildCommand - ) + getGeminiInstructionsFile(context.systemInstructions, context.buildCommand), ), writeFile( join(settingsDir, 'settings.json'), - getGeminiSettingsFile( - context.packageManager, - context.possiblePackageManagers - ) + getGeminiSettingsFile(context.packageManager, context.possiblePackageManagers), ), ]); const reasoning = await this.runGeminiProcess(model, context, 2, 10); const finalSnapshot = await DirectorySnapshot.forDirectory( context.directory, - this.evalIgnoredPatterns + this.evalIgnoredPatterns, ); const diff = finalSnapshot.getChangedOrAddedFiles(initialSnapshot); @@ -103,22 +91,18 @@ export class GeminiCliRunner implements LlmRunner { }); } - return { files, reasoning, toolLogs: [] }; + return {files, reasoning, toolLogs: []}; } generateText(): Promise { // Technically we can make this work, but we don't need it at the time of writing. - throw new UserFacingError( - 'Generating text with Gemini CLI is not supported.' - ); + throw new UserFacingError('Generating text with Gemini CLI is not supported.'); } generateConstrained(): Promise> { // We can't support this, because there's no straightforward // way to tell the Gemini CLI to follow a schema. - throw new UserFacingError( - 'Constrained output with Gemini CLI is not supported.' - ); + throw new UserFacingError('Constrained output with Gemini CLI is not supported.'); } getSupportedModels(): string[] { @@ -163,14 +147,10 @@ export class GeminiCliRunner implements LlmRunner { } } - const binaryPath = closestRoot - ? join(closestRoot, 'node_modules/.bin/gemini') - : null; + const binaryPath = closestRoot ? join(closestRoot, 'node_modules/.bin/gemini') : null; if (!binaryPath || !existsSync(binaryPath)) { - throw new UserFacingError( - 'Gemini CLI is not installed inside the current project' - ); + throw new UserFacingError('Gemini CLI is not installed inside the current project'); } return binaryPath; @@ -180,9 +160,9 @@ export class GeminiCliRunner implements LlmRunner { model: string, context: LlmGenerateFilesContext, inactivityTimeoutMins: number, - totalRequestTimeoutMins: number + totalRequestTimeoutMins: number, ): Promise { - return new Promise((resolve) => { + return new Promise(resolve => { let stdoutBuffer = ''; let stdErrBuffer = ''; let isDone = false; @@ -204,8 +184,7 @@ export class GeminiCliRunner implements LlmRunner { this.pendingTimeouts.delete(globalTimeout); this.pendingProcesses.delete(childProcess); - const separator = - '\n--------------------------------------------------\n'; + const separator = '\n--------------------------------------------------\n'; if (stdErrBuffer.length > 0) { stdoutBuffer += separator + 'Stderr output:\n' + stdErrBuffer; @@ -218,23 +197,20 @@ export class GeminiCliRunner implements LlmRunner { const noOutputCallback = () => { finalize( `There was no output from Gemini CLI for ${inactivityTimeoutMins} minute(s). ` + - `Stopping the process...` + `Stopping the process...`, ); }; // Gemini can get into a state where it stops outputting code, but it also doesn't exit // the process. Stop if there hasn't been any output for a certain amount of time. - let inactivityTimeout = setTimeout( - noOutputCallback, - inactivityTimeoutMins * msPerMin - ); + let inactivityTimeout = setTimeout(noOutputCallback, inactivityTimeoutMins * msPerMin); this.pendingTimeouts.add(inactivityTimeout); // Also add a timeout for the entire codegen process. const globalTimeout = setTimeout(() => { finalize( `Gemini CLI didn't finish within ${totalRequestTimeoutMins} minute(s). ` + - `Stopping the process...` + `Stopping the process...`, ); }, totalRequestTimeoutMins * msPerMin); @@ -251,30 +227,24 @@ export class GeminiCliRunner implements LlmRunner { ], { cwd: context.directory, - env: { ...process.env }, - } + env: {...process.env}, + }, ); - childProcess.on('close', (code) => - finalize( - 'Gemini CLI process has exited' + - (code == null ? '.' : ` with ${code} code.`) - ) + childProcess.on('close', code => + finalize('Gemini CLI process has exited' + (code == null ? '.' : ` with ${code} code.`)), ); - childProcess.stdout.on('data', (data) => { + childProcess.stdout.on('data', data => { if (inactivityTimeout) { this.pendingTimeouts.delete(inactivityTimeout); clearTimeout(inactivityTimeout); } stdoutBuffer += data.toString(); - inactivityTimeout = setTimeout( - noOutputCallback, - inactivityTimeoutMins * msPerMin - ); + inactivityTimeout = setTimeout(noOutputCallback, inactivityTimeoutMins * msPerMin); this.pendingTimeouts.add(inactivityTimeout); }); - childProcess.stderr.on('data', (data) => { + childProcess.stderr.on('data', data => { stdErrBuffer += data.toString(); }); }); diff --git a/runner/codegen/gemini-cli/gemini-files.ts b/runner/codegen/gemini-cli/gemini-files.ts index 762ffba..9412cd7 100644 --- a/runner/codegen/gemini-cli/gemini-files.ts +++ b/runner/codegen/gemini-cli/gemini-files.ts @@ -1,7 +1,7 @@ /** Generates the `GEMINI.md` file for an eval run. */ export function getGeminiInstructionsFile( systemInstructions: string, - buildCommand: string + buildCommand: string, ): string { return [ `# Important Rules`, @@ -48,7 +48,7 @@ export function getGeminiIgnoreFile(): string { /** Gets the content of the `.gemini/settings.json` file. */ export function getGeminiSettingsFile( packageManager: string, - possiblePackageManagers: string[] + possiblePackageManagers: string[], ): string { const config = { excludeTools: [ @@ -56,8 +56,8 @@ export function getGeminiSettingsFile( // managers since doing so via prompting doesn't always work. 'run_shell_command(git)', ...possiblePackageManagers - .filter((m) => m !== packageManager) - .map((m) => `run_shell_command(${m})`), + .filter(m => m !== packageManager) + .map(m => `run_shell_command(${m})`), // Note that we don't block all commands, // because the build commands also go through it. diff --git a/runner/codegen/genkit/genkit-logger.ts b/runner/codegen/genkit/genkit-logger.ts index 7ed8322..e4e1507 100644 --- a/runner/codegen/genkit/genkit-logger.ts +++ b/runner/codegen/genkit/genkit-logger.ts @@ -1,4 +1,4 @@ -import { logger } from 'genkit/logging'; +import {logger} from 'genkit/logging'; const defaultLogger = logger.defaultLogger; diff --git a/runner/codegen/genkit/genkit-runner.ts b/runner/codegen/genkit/genkit-runner.ts index 7ad9cda..b4e1887 100644 --- a/runner/codegen/genkit/genkit-runner.ts +++ b/runner/codegen/genkit/genkit-runner.ts @@ -1,13 +1,7 @@ -import { - DynamicResourceAction, - GenerateResponse, - genkit, - ModelReference, - ToolAction, -} from 'genkit'; -import { GenkitMcpHost, McpServerConfig, createMcpHost } from '@genkit-ai/mcp'; -import { GenkitPlugin, GenkitPluginV2 } from 'genkit/plugin'; -import { z } from 'zod'; +import {DynamicResourceAction, GenerateResponse, genkit, ModelReference, ToolAction} from 'genkit'; +import {GenkitMcpHost, McpServerConfig, createMcpHost} from '@genkit-ai/mcp'; +import {GenkitPlugin, GenkitPluginV2} from 'genkit/plugin'; +import {z} from 'zod'; import { McpServerOptions, LlmConstrainedOutputGenerateRequestOptions, @@ -18,17 +12,14 @@ import { LlmGenerateTextRequestOptions, LlmGenerateFilesRequestOptions, } from '../llm-runner.js'; -import { setTimeout } from 'node:timers/promises'; -import { callWithTimeout } from '../../utils/timeout.js'; -import { logger } from 'genkit/logging'; -import { GenkitLogger } from './genkit-logger.js'; -import { MODEL_PROVIDERS } from './models.js'; -import { UserFacingError } from '../../utils/errors.js'; -import { - GenkitModelProvider, - PromptDataForCounting, -} from './model-provider.js'; -import { ToolLogEntry } from '../../shared-interfaces.js'; +import {setTimeout} from 'node:timers/promises'; +import {callWithTimeout} from '../../utils/timeout.js'; +import {logger} from 'genkit/logging'; +import {GenkitLogger} from './genkit-logger.js'; +import {MODEL_PROVIDERS} from './models.js'; +import {UserFacingError} from '../../utils/errors.js'; +import {GenkitModelProvider, PromptDataForCounting} from './model-provider.js'; +import {ToolLogEntry} from '../../shared-interfaces.js'; const globalLogger = new GenkitLogger(); logger.init(globalLogger); @@ -43,9 +34,9 @@ export class GenkitRunner implements LlmRunner { private toolLogs: ToolLogEntry[] = []; async generateConstrained( - options: LlmConstrainedOutputGenerateRequestOptions + options: LlmConstrainedOutputGenerateRequestOptions, ): Promise> { - const { provider, model } = this.resolveModel(options.model); + const {provider, model} = this.resolveModel(options.model); const result = await this._genkitRequest(provider, model, options); return { @@ -55,25 +46,21 @@ export class GenkitRunner implements LlmRunner { }; } - async generateFiles( - options: LlmGenerateFilesRequestOptions - ): Promise { + async generateFiles(options: LlmGenerateFilesRequestOptions): Promise { const requestOptions: LlmConstrainedOutputGenerateRequestOptions = { ...options, prompt: options.context.combinedPrompt, schema: z.object({ outputFiles: z.array( z.object({ - filePath: z - .string() - .describe('Name of the file that is being changed'), + filePath: z.string().describe('Name of the file that is being changed'), code: z.string().describe('New code of the file'), - }) + }), ), }), }; - const { provider, model } = this.resolveModel(options.model); + const {provider, model} = this.resolveModel(options.model); const result = await this._genkitRequest(provider, model, requestOptions); const files = result.output.outputFiles || []; @@ -93,10 +80,8 @@ export class GenkitRunner implements LlmRunner { return this.toolLogs.splice(0); } - async generateText( - options: LlmGenerateTextRequestOptions - ): Promise { - const { provider, model } = this.resolveModel(options.model); + async generateText(options: LlmGenerateTextRequestOptions): Promise { + const {provider, model} = this.resolveModel(options.model); const result = await this._genkitRequest(provider, model, options); return { @@ -108,24 +93,20 @@ export class GenkitRunner implements LlmRunner { } getSupportedModels(): string[] { - return MODEL_PROVIDERS.flatMap((p) => p.getSupportedModels()); + return MODEL_PROVIDERS.flatMap(p => p.getSupportedModels()); } private async _genkitRequest( provider: GenkitModelProvider, model: ModelReference, - options: - | LlmGenerateTextRequestOptions - | LlmConstrainedOutputGenerateRequestOptions + options: LlmGenerateTextRequestOptions | LlmConstrainedOutputGenerateRequestOptions, ) { return await rateLimitLLMRequest( provider, model, - { messages: options.messages || [], prompt: options.prompt }, + {messages: options.messages || [], prompt: options.prompt}, () => { - const schema = ( - options as Partial - ).schema; + const schema = (options as Partial).schema; const performRequest = async () => { let tools: ToolAction[] | undefined; let resources: DynamicResourceAction[] | undefined; @@ -152,10 +133,9 @@ export class GenkitRunner implements LlmRunner { : undefined, config: provider.getModelSpecificConfig( { - includeThoughts: - options.thinkingConfig?.includeThoughts ?? false, + includeThoughts: options.thinkingConfig?.includeThoughts ?? false, }, - options.model + options.model, ), messages: options.messages, tools, @@ -172,10 +152,10 @@ export class GenkitRunner implements LlmRunner { ? callWithTimeout( options.timeout.description, performRequest, - options.timeout.durationInMins + options.timeout.durationInMins, ) : performRequest(); - } + }, ); } @@ -190,15 +170,9 @@ export class GenkitRunner implements LlmRunner { } for (const contentPart of message.content) { if (contentPart.toolRequest) { - toolRequests.set( - contentPart.toolRequest.ref || '0', - contentPart.toolRequest - ); + toolRequests.set(contentPart.toolRequest.ref || '0', contentPart.toolRequest); } else if (contentPart.toolResponse) { - toolResponses.set( - contentPart.toolResponse.ref || '0', - contentPart.toolResponse - ); + toolResponses.set(contentPart.toolResponse.ref || '0', contentPart.toolResponse); } } } @@ -222,24 +196,22 @@ export class GenkitRunner implements LlmRunner { const mcpServers = servers.reduce( (result, current) => { - const { name, ...config } = current; + const {name, ...config} = current; result[name] = config; return result; }, - {} as Record + {} as Record, ); globalLogger.startCapturingLogs(); - this.mcpHost = createMcpHost({ name: hostName, mcpServers }); + this.mcpHost = createMcpHost({name: hostName, mcpServers}); } flushMcpServerLogs(): string[] { return globalLogger .flushCapturedLogs() - .filter( - (log): log is string => typeof log === 'string' && log.includes('[MCP') - ); + .filter((log): log is string => typeof log === 'string' && log.includes('[MCP')); } async dispose() { @@ -255,15 +227,15 @@ export class GenkitRunner implements LlmRunner { const model = provider.createModel(name); if (model) { - return { provider: provider as GenkitModelProvider, model }; + return {provider: provider as GenkitModelProvider, model}; } } throw new UserFacingError( `Unrecognized model '${name}'. The configured models are:\n` + this.getSupportedModels() - .map((m) => `- ${m}`) - .join('\n') + .map(m => `- ${m}`) + .join('\n'), ); } @@ -285,11 +257,11 @@ export class GenkitRunner implements LlmRunner { throw new UserFacingError( `No LLM providers have been configured. You must set at least one of the ` + `following environment variables:\n` + - environmentVars.map((e) => `- ${e}`).join('\n') + environmentVars.map(e => `- ${e}`).join('\n'), ); } - return genkit({ plugins }); + return genkit({plugins}); } } @@ -301,10 +273,10 @@ async function rateLimitLLMRequest( model: ModelReference, prompt: string | PromptDataForCounting, requestFn: () => Promise, - retryCount = 0 + retryCount = 0, ): Promise { if (typeof prompt === 'string') { - prompt = { messages: [], prompt }; + prompt = {messages: [], prompt}; } provider.rateLimit(prompt, model); @@ -322,16 +294,9 @@ async function rateLimitLLMRequest( throw e; } // Exponential backoff with randomness to avoid retrying at the same times with other requests. - const backoffSeconds = - (25 + 10 * 1.35 ** retryCount++) * (0.8 + Math.random() * 0.4); + const backoffSeconds = (25 + 10 * 1.35 ** retryCount++) * (0.8 + Math.random() * 0.4); await setTimeout(1000 * backoffSeconds); - return rateLimitLLMRequest( - provider, - model, - prompt, - requestFn, - retryCount - ); + return rateLimitLLMRequest(provider, model, prompt, requestFn, retryCount); } } throw e; diff --git a/runner/codegen/genkit/model-provider.ts b/runner/codegen/genkit/model-provider.ts index c126eb7..f16f48f 100644 --- a/runner/codegen/genkit/model-provider.ts +++ b/runner/codegen/genkit/model-provider.ts @@ -1,8 +1,8 @@ -import { ModelReference } from 'genkit'; -import { GenkitPlugin, GenkitPluginV2 } from 'genkit/plugin'; -import { RateLimiter } from 'limiter'; -import { PromptDataMessage } from '../llm-runner.js'; -import { LlmResponseFile } from '../../shared-interfaces.js'; +import {ModelReference} from 'genkit'; +import {GenkitPlugin, GenkitPluginV2} from 'genkit/plugin'; +import {RateLimiter} from 'limiter'; +import {PromptDataMessage} from '../llm-runner.js'; +import {LlmResponseFile} from '../../shared-interfaces.js'; export interface RateLimitConfig { requestPerMinute: RateLimiter; @@ -53,19 +53,11 @@ export abstract class GenkitModelProvider { */ abstract validateGeneratedFiles(files: LlmResponseFile[]): boolean; - protected abstract pluginFactory( - apiKey: string - ): GenkitPlugin | GenkitPluginV2; + protected abstract pluginFactory(apiKey: string): GenkitPlugin | GenkitPluginV2; - abstract getModelSpecificConfig( - opts: { includeThoughts?: boolean }, - modelName: string - ): object; + abstract getModelSpecificConfig(opts: {includeThoughts?: boolean}, modelName: string): object; - async rateLimit( - prompt: PromptDataForCounting, - model: ModelReference - ): Promise { + async rateLimit(prompt: PromptDataForCounting, model: ModelReference): Promise { const config = this.rateLimitConfig[model.name]; if (config) { diff --git a/runner/codegen/genkit/models.ts b/runner/codegen/genkit/models.ts index 1c8f113..beb9da5 100644 --- a/runner/codegen/genkit/models.ts +++ b/runner/codegen/genkit/models.ts @@ -1,7 +1,7 @@ -import { GeminiModelProvider } from './providers/gemini.js'; -import { ClaudeModelProvider } from './providers/claude.js'; -import { OpenAiModelProvider } from './providers/open-ai.js'; -import { GrokModelProvider } from './providers/grok.js'; +import {GeminiModelProvider} from './providers/gemini.js'; +import {ClaudeModelProvider} from './providers/claude.js'; +import {OpenAiModelProvider} from './providers/open-ai.js'; +import {GrokModelProvider} from './providers/grok.js'; export const MODEL_PROVIDERS = [ new GeminiModelProvider(), diff --git a/runner/codegen/genkit/providers/claude.ts b/runner/codegen/genkit/providers/claude.ts index 95a42e0..9456163 100644 --- a/runner/codegen/genkit/providers/claude.ts +++ b/runner/codegen/genkit/providers/claude.ts @@ -1,14 +1,10 @@ -import { Anthropic } from '@anthropic-ai/sdk'; -import { GenkitPlugin } from 'genkit/plugin'; -import { - GenkitModelProvider, - PromptDataForCounting, - RateLimitConfig, -} from '../model-provider.js'; -import { anthropic } from 'genkitx-anthropic'; -import { claude35Haiku, claude4Sonnet } from 'genkitx-anthropic'; -import { lazy } from '../../../utils/lazy-creation.js'; -import { RateLimiter } from 'limiter'; +import {Anthropic} from '@anthropic-ai/sdk'; +import {GenkitPlugin} from 'genkit/plugin'; +import {GenkitModelProvider, PromptDataForCounting, RateLimitConfig} from '../model-provider.js'; +import {anthropic} from 'genkitx-anthropic'; +import {claude35Haiku, claude4Sonnet} from 'genkitx-anthropic'; +import {lazy} from '../../../utils/lazy-creation.js'; +import {RateLimiter} from 'limiter'; export class ClaudeModelProvider extends GenkitModelProvider { readonly apiKeyVariableName = 'ANTHROPIC_API_KEY'; @@ -29,7 +25,7 @@ export class ClaudeModelProvider extends GenkitModelProvider { tokensPerInterval: 40_000 * 0.75, // *0.75 to be more resilient to token count deviations interval: 1000 * 60 * 1.5, // Refresh tokens after 1.5 minutes to be on the safe side. }), - countTokens: (prompt) => this.countClaudeTokens(prompt), + countTokens: prompt => this.countClaudeTokens(prompt), }, }; @@ -43,16 +39,14 @@ export class ClaudeModelProvider extends GenkitModelProvider { } private anthropicApi = lazy(() => { - return new Anthropic({ apiKey: this.getApiKey() || undefined }); + return new Anthropic({apiKey: this.getApiKey() || undefined}); }); protected pluginFactory(apiKey: string): GenkitPlugin { - return anthropic({ apiKey }); + return anthropic({apiKey}); } - private async countClaudeTokens( - prompt: PromptDataForCounting - ): Promise { + private async countClaudeTokens(prompt: PromptDataForCounting): Promise { const sonnetPrompt: string | Anthropic.Messages.MessageParam[] = []; for (const part of prompt.messages) { for (const c of part.content) { @@ -76,7 +70,7 @@ export class ClaudeModelProvider extends GenkitModelProvider { } const messages: Anthropic.Messages.MessageParam[] = [ ...sonnetPrompt, - { content: prompt.prompt, role: 'user' }, + {content: prompt.prompt, role: 'user'}, ]; return ( diff --git a/runner/codegen/genkit/providers/gemini.ts b/runner/codegen/genkit/providers/gemini.ts index 2300876..30c3713 100644 --- a/runner/codegen/genkit/providers/gemini.ts +++ b/runner/codegen/genkit/providers/gemini.ts @@ -1,21 +1,15 @@ -import { GenkitPlugin } from 'genkit/plugin'; -import { googleAI } from '@genkit-ai/googleai'; -import { - GenkitModelProvider, - PromptDataForCounting, - RateLimitConfig, -} from '../model-provider.js'; -import { lazy } from '../../../utils/lazy-creation.js'; -import { GoogleGenAI, Part } from '@google/genai'; -import { RateLimiter } from 'limiter'; -import { LlmResponseFile } from '../../../shared-interfaces.js'; +import {GenkitPlugin} from 'genkit/plugin'; +import {googleAI} from '@genkit-ai/googleai'; +import {GenkitModelProvider, PromptDataForCounting, RateLimitConfig} from '../model-provider.js'; +import {lazy} from '../../../utils/lazy-creation.js'; +import {GoogleGenAI, Part} from '@google/genai'; +import {RateLimiter} from 'limiter'; +import {LlmResponseFile} from '../../../shared-interfaces.js'; export class GeminiModelProvider extends GenkitModelProvider { readonly apiKeyVariableName = 'GEMINI_API_KEY'; - private geminiAPI = lazy( - () => new GoogleGenAI({ apiKey: this.getApiKey() || undefined }) - ); + private geminiAPI = lazy(() => new GoogleGenAI({apiKey: this.getApiKey() || undefined})); protected models = { 'gemini-2.5-pro': () => googleAI.model('gemini-2.5-pro'), @@ -35,7 +29,7 @@ export class GeminiModelProvider extends GenkitModelProvider { tokensPerInterval: 2_000_000 * 0.75, // *0.75 to be more resilient to token count deviations interval: 1000 * 60 * 1.5, // Refresh tokens after 1.5 minutes to be on the safe side. }), - countTokens: (prompt) => this.countGeminiTokens(prompt, 'gemini-2.5-pro'), + countTokens: prompt => this.countGeminiTokens(prompt, 'gemini-2.5-pro'), }, // See: https://ai.google.dev/gemini-api/docs/rate-limits#tier-1 // 1000 per minute requests is Gemini Flash's limit right now. @@ -48,8 +42,7 @@ export class GeminiModelProvider extends GenkitModelProvider { tokensPerInterval: 1_000_000 * 0.75, // *0.75 to be more resilient to token count deviations interval: 1000 * 60 * 1.5, // Refresh tokens after 1.5 minutes to be on the safe side. }), - countTokens: (prompt) => - this.countGeminiTokens(prompt, 'gemini-2.5-flash'), + countTokens: prompt => this.countGeminiTokens(prompt, 'gemini-2.5-flash'), }, 'googleai/gemini-2.5-flash-lite': { // See: https://ai.google.dev/gemini-api/docs/rate-limits#tier-1 @@ -62,38 +55,34 @@ export class GeminiModelProvider extends GenkitModelProvider { tokensPerInterval: 4_000_000 * 0.75, // *0.75 to be more resilient to token count deviations interval: 1000 * 60 * 1.5, // Refresh tokens after 1.5 minutes to be on the safe side. }), - countTokens: (prompt) => - this.countGeminiTokens(prompt, 'gemini-2.5-flash-lite'), + countTokens: prompt => this.countGeminiTokens(prompt, 'gemini-2.5-flash-lite'), }, }; protected pluginFactory(apiKey: string): GenkitPlugin { - return googleAI({ apiKey }); + return googleAI({apiKey}); } - getModelSpecificConfig(opts: { includeThoughts?: boolean }): object { - return { thinkingConfig: { includeThoughts: opts.includeThoughts } }; + getModelSpecificConfig(opts: {includeThoughts?: boolean}): object { + return {thinkingConfig: {includeThoughts: opts.includeThoughts}}; } validateGeneratedFiles(files: LlmResponseFile[]): boolean { // Gemini responses occasionally get truncated on `class=`. // Consider these cases as invalid so they don't influence the results. - return ( - files.length === 0 || - !files.some((file) => file.code.trim().endsWith('class=')) - ); + return files.length === 0 || !files.some(file => file.code.trim().endsWith('class=')); } private async countGeminiTokens( prompt: PromptDataForCounting, - modelName: string + modelName: string, ): Promise { const contents = [ - ...prompt.messages.map((m) => ({ + ...prompt.messages.map(m => ({ role: m.role, - parts: m.content.map((c) => { + parts: m.content.map(c => { return 'text' in c - ? ({ text: c.text } satisfies Part) + ? ({text: c.text} satisfies Part) : ({ inlineData: { data: c.media.base64PngImage, @@ -102,7 +91,7 @@ export class GeminiModelProvider extends GenkitModelProvider { } satisfies Part); }), })), - { role: 'user', parts: [{ text: prompt.prompt }] }, + {role: 'user', parts: [{text: prompt.prompt}]}, ]; try { diff --git a/runner/codegen/genkit/providers/grok.ts b/runner/codegen/genkit/providers/grok.ts index 890ea3c..f457edd 100644 --- a/runner/codegen/genkit/providers/grok.ts +++ b/runner/codegen/genkit/providers/grok.ts @@ -1,12 +1,8 @@ -import { xAI } from '@genkit-ai/compat-oai/xai'; -import { GenkitPlugin, GenkitPluginV2 } from 'genkit/plugin'; -import { RateLimiter } from 'limiter'; +import {xAI} from '@genkit-ai/compat-oai/xai'; +import {GenkitPlugin, GenkitPluginV2} from 'genkit/plugin'; +import {RateLimiter} from 'limiter'; import fetch from 'node-fetch'; -import { - GenkitModelProvider, - PromptDataForCounting, - RateLimitConfig, -} from '../model-provider.js'; +import {GenkitModelProvider, PromptDataForCounting, RateLimitConfig} from '../model-provider.js'; export class GrokModelProvider extends GenkitModelProvider { readonly apiKeyVariableName = 'XAI_API_KEY'; @@ -16,9 +12,7 @@ export class GrokModelProvider extends GenkitModelProvider { 'grok-code-fast-1': () => xAI.model('grok-code-fast-1'), }; - private async countTokensWithXaiApi( - prompt: PromptDataForCounting - ): Promise { + private async countTokensWithXaiApi(prompt: PromptDataForCounting): Promise { const apiKey = this.getApiKey(); if (!apiKey) { return null; @@ -27,7 +21,7 @@ export class GrokModelProvider extends GenkitModelProvider { try { // Use xAI's tokenize API for accurate token counting const messages = this.genkitPromptToXaiFormat(prompt); - const text = messages.map((m) => `${m.role}: ${m.content}`).join('\n'); + const text = messages.map(m => `${m.role}: ${m.content}`).join('\n'); const response = await fetch('https://api.x.ai/v1/tokenize', { method: 'POST', @@ -35,11 +29,11 @@ export class GrokModelProvider extends GenkitModelProvider { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}`, }, - body: JSON.stringify({ text }), + body: JSON.stringify({text}), }); if (response.ok) { - const data = (await response.json()) as { tokens: unknown[] }; + const data = (await response.json()) as {tokens: unknown[]}; return data.tokens?.length || 0; } return null; @@ -51,7 +45,7 @@ export class GrokModelProvider extends GenkitModelProvider { private async countTokensForModel( _modelName: string, - prompt: PromptDataForCounting + prompt: PromptDataForCounting, ): Promise { const xaiTokenCount = await this.countTokensWithXaiApi(prompt); if (xaiTokenCount !== null) { @@ -71,7 +65,7 @@ export class GrokModelProvider extends GenkitModelProvider { tokensPerInterval: 2_000_000 * 0.75, interval: 1000 * 60 * 1.5, // Refresh tokens after 1.5 minutes to be on the safe side }), - countTokens: (prompt) => this.countTokensForModel('grok-4', prompt), + countTokens: prompt => this.countTokensForModel('grok-4', prompt), }, 'xai/grok-code-fast-1': { requestPerMinute: new RateLimiter({ @@ -82,13 +76,12 @@ export class GrokModelProvider extends GenkitModelProvider { tokensPerInterval: 2_000_000 * 0.75, interval: 1000 * 60 * 1.5, // Refresh tokens after 1.5 minutes to be on the safe side }), - countTokens: (prompt) => - this.countTokensForModel('grok-code-fast-1', prompt), + countTokens: prompt => this.countTokensForModel('grok-code-fast-1', prompt), }, }; protected pluginFactory(apiKey: string): GenkitPlugin | GenkitPluginV2 { - return xAI({ apiKey }); + return xAI({apiKey}); } getModelSpecificConfig(): object { @@ -101,9 +94,9 @@ export class GrokModelProvider extends GenkitModelProvider { } private genkitPromptToXaiFormat( - prompt: PromptDataForCounting - ): Array<{ role: string; content: string }> { - const xaiPrompt: Array<{ role: string; content: string }> = []; + prompt: PromptDataForCounting, + ): Array<{role: string; content: string}> { + const xaiPrompt: Array<{role: string; content: string}> = []; for (const part of prompt.messages) { for (const c of part.content) { xaiPrompt.push({ @@ -112,6 +105,6 @@ export class GrokModelProvider extends GenkitModelProvider { }); } } - return [...xaiPrompt, { role: 'user', content: prompt.prompt }]; + return [...xaiPrompt, {role: 'user', content: prompt.prompt}]; } } diff --git a/runner/codegen/genkit/providers/open-ai.ts b/runner/codegen/genkit/providers/open-ai.ts index c3c88b0..0b62ffd 100644 --- a/runner/codegen/genkit/providers/open-ai.ts +++ b/runner/codegen/genkit/providers/open-ai.ts @@ -1,12 +1,8 @@ -import { GenkitPluginV2 } from 'genkit/plugin'; -import { openAI } from '@genkit-ai/compat-oai/openai'; -import { RateLimiter } from 'limiter'; -import { - GenkitModelProvider, - PromptDataForCounting, - RateLimitConfig, -} from '../model-provider.js'; -import { encoding_for_model } from 'tiktoken'; +import {GenkitPluginV2} from 'genkit/plugin'; +import {openAI} from '@genkit-ai/compat-oai/openai'; +import {RateLimiter} from 'limiter'; +import {GenkitModelProvider, PromptDataForCounting, RateLimitConfig} from '../model-provider.js'; +import {encoding_for_model} from 'tiktoken'; export class OpenAiModelProvider extends GenkitModelProvider { readonly apiKeyVariableName = 'OPENAI_API_KEY'; @@ -19,12 +15,12 @@ export class OpenAiModelProvider extends GenkitModelProvider { private countTokensForModel( modelName: Parameters[0], - prompt: PromptDataForCounting + prompt: PromptDataForCounting, ): number { const encoding = encoding_for_model(modelName); try { const messages = this.genkitPromptToOpenAi(prompt); - const text = messages.map((m) => `${m.role}: ${m.content}`).join('\n'); + const text = messages.map(m => `${m.role}: ${m.content}`).join('\n'); const tokens = encoding.encode(text); return tokens.length; } finally { @@ -43,7 +39,7 @@ export class OpenAiModelProvider extends GenkitModelProvider { tokensPerInterval: 30_000 * 0.75, // *0.75 to be more resilient to token count deviations interval: 1000 * 60 * 1.5, // Refresh tokens after 1.5 minutes to be on the safe side. }), - countTokens: async (prompt) => this.countTokensForModel('gpt-4o', prompt), + countTokens: async prompt => this.countTokensForModel('gpt-4o', prompt), }, // See https://platform.openai.com/docs/models/o4-mini 'openai/o4-mini': { @@ -55,8 +51,7 @@ export class OpenAiModelProvider extends GenkitModelProvider { tokensPerInterval: 100_000 * 0.75, // *0.75 to be more resilient to token count deviations interval: 1000 * 60 * 1.5, // Refresh tokens after 1.5 minutes to be on the safe side. }), - countTokens: async (prompt) => - this.countTokensForModel('gpt-4o-mini', prompt), + countTokens: async prompt => this.countTokensForModel('gpt-4o-mini', prompt), }, // See: https://platform.openai.com/docs/models/gpt-5 'openai/gpt-5': { @@ -68,12 +63,12 @@ export class OpenAiModelProvider extends GenkitModelProvider { tokensPerInterval: 30_000 * 0.75, // *0.75 to be more resilient to token count deviations interval: 1000 * 60 * 1.5, // Refresh tokens after 1.5 minutes to be on the safe side. }), - countTokens: async (prompt) => this.countTokensForModel('gpt-5', prompt), + countTokens: async prompt => this.countTokensForModel('gpt-5', prompt), }, }; protected pluginFactory(apiKey: string): GenkitPluginV2 { - return openAI({ apiKey, maxRetries: 0 }); + return openAI({apiKey, maxRetries: 0}); } getModelSpecificConfig(): object { @@ -86,9 +81,9 @@ export class OpenAiModelProvider extends GenkitModelProvider { } private genkitPromptToOpenAi( - prompt: PromptDataForCounting - ): Array<{ role: string; content: string }> { - const openAiPrompt: Array<{ role: string; content: string }> = []; + prompt: PromptDataForCounting, + ): Array<{role: string; content: string}> { + const openAiPrompt: Array<{role: string; content: string}> = []; for (const part of prompt.messages) { for (const c of part.content) { openAiPrompt.push({ @@ -97,6 +92,6 @@ export class OpenAiModelProvider extends GenkitModelProvider { }); } } - return [...openAiPrompt, { role: 'user', content: prompt.prompt }]; + return [...openAiPrompt, {role: 'user', content: prompt.prompt}]; } } diff --git a/runner/codegen/llm-runner.ts b/runner/codegen/llm-runner.ts index 69a7205..fd75644 100644 --- a/runner/codegen/llm-runner.ts +++ b/runner/codegen/llm-runner.ts @@ -1,12 +1,12 @@ -import { z } from 'zod'; -import { LlmResponseFile, ToolLogEntry, Usage } from '../shared-interfaces.js'; -import { UserFacingError } from '../utils/errors.js'; +import {z} from 'zod'; +import {LlmResponseFile, ToolLogEntry, Usage} from '../shared-interfaces.js'; +import {UserFacingError} from '../utils/errors.js'; export function assertValidModelName(value: string, availableModels: string[]) { if (!availableModels.includes(value)) { throw new UserFacingError( `Unsupported model specified. Available models:\n` + - availableModels.map((m) => `- ${m}`).join('\n') + availableModels.map(m => `- ${m}`).join('\n'), ); } } @@ -28,18 +28,14 @@ export interface LlmRunner { readonly hasBuiltInRepairLoop: boolean; /** Sends a file generation request to the LLM. */ - generateFiles( - options: LlmGenerateFilesRequestOptions - ): Promise; + generateFiles(options: LlmGenerateFilesRequestOptions): Promise; /** Sends a normal text generation request to the LLM. */ - generateText( - options: LlmGenerateTextRequestOptions - ): Promise; + generateText(options: LlmGenerateTextRequestOptions): Promise; /** Sends a schema-constrained generation request to the LLM. */ generateConstrained( - options: LlmConstrainedOutputGenerateRequestOptions + options: LlmConstrainedOutputGenerateRequestOptions, ): Promise>; /** Gets the names of the models supported by the runner. */ @@ -130,9 +126,8 @@ export interface LlmGenerateFilesRequestOptions extends BaseLlmRequestOptions { * Options that can be passed for a schema-constrained generation * request to an LLM. */ -export interface LlmConstrainedOutputGenerateRequestOptions< - T extends z.ZodTypeAny = z.ZodTypeAny, -> extends BaseLlmRequestOptions { +export interface LlmConstrainedOutputGenerateRequestOptions + extends BaseLlmRequestOptions { /** Prompt to send. */ prompt: string; /** Schema that the response should conform to. */ @@ -140,9 +135,7 @@ export interface LlmConstrainedOutputGenerateRequestOptions< } /** Constrained output response by the LLM. */ -export interface LlmConstrainedOutputGenerateResponse< - T extends z.ZodTypeAny = z.ZodTypeAny, -> { +export interface LlmConstrainedOutputGenerateResponse { /** Result generated by the LLM. */ output: z.infer | null; /** Token usage data, if available. */ @@ -194,7 +187,5 @@ export type McpServerOptions = z.infer; * */ export interface PromptDataMessage { role: 'user'; - content: Array< - { text: string } | { media: { url: string; base64PngImage: string } } - >; + content: Array<{text: string} | {media: {url: string; base64PngImage: string}}>; } diff --git a/runner/codegen/runner-creation.ts b/runner/codegen/runner-creation.ts index 8d78716..046b5fe 100644 --- a/runner/codegen/runner-creation.ts +++ b/runner/codegen/runner-creation.ts @@ -1,6 +1,6 @@ -import { UserFacingError } from '../utils/errors.js'; -import type { GeminiCliRunner } from './gemini-cli/gemini-cli-runner.js'; -import type { GenkitRunner } from './genkit/genkit-runner.js'; +import {UserFacingError} from '../utils/errors.js'; +import type {GeminiCliRunner} from './gemini-cli/gemini-cli-runner.js'; +import type {GenkitRunner} from './genkit/genkit-runner.js'; interface AvailableRunners { genkit: GenkitRunner; @@ -11,9 +11,7 @@ interface AvailableRunners { export type RunnerName = keyof AvailableRunners; /** Creates an `LlmRunner` based on a name. */ -export async function getRunnerByName( - name: T -): Promise { +export async function getRunnerByName(name: T): Promise { // Note that we lazily import and resolve the runners here, because their imports // might have side effects. E.g. Genkit installs a listener on the process exiting // in order to kill pending instances and log "Closing all Genkit instances". @@ -21,11 +19,11 @@ export async function getRunnerByName( switch (name) { case 'genkit': return import('./genkit/genkit-runner.js').then( - (m) => new m.GenkitRunner() as AvailableRunners[T] + m => new m.GenkitRunner() as AvailableRunners[T], ); case 'gemini-cli': return import('./gemini-cli/gemini-cli-runner.js').then( - (m) => new m.GeminiCliRunner() as AvailableRunners[T] + m => new m.GeminiCliRunner() as AvailableRunners[T], ); default: throw new UserFacingError(`Unsupported runner ${name}`); diff --git a/runner/configuration/base-environment-config.ts b/runner/configuration/base-environment-config.ts index 1673931..fe311ae 100644 --- a/runner/configuration/base-environment-config.ts +++ b/runner/configuration/base-environment-config.ts @@ -1,8 +1,8 @@ import z from 'zod'; -import { ratingSchema } from '../ratings/rating-types.js'; -import { MultiStepPrompt } from './multi-step-prompt.js'; -import { mcpServerOptionsSchema } from '../codegen/llm-runner.js'; -import { getPossiblePackageManagers } from './environment-config.js'; +import {ratingSchema} from '../ratings/rating-types.js'; +import {MultiStepPrompt} from './multi-step-prompt.js'; +import {mcpServerOptionsSchema} from '../codegen/llm-runner.js'; +import {getPossiblePackageManagers} from './environment-config.js'; export const baseEnvironmentConfigSchema = z.strictObject({ /** Display name for the environment. */ @@ -40,8 +40,8 @@ export const baseEnvironmentConfigSchema = z.strictObject({ name: z.string().optional(), ratings: z.array(ratingSchema).optional(), }), - z.custom((data) => data instanceof MultiStepPrompt), - ]) + z.custom(data => data instanceof MultiStepPrompt), + ]), ), /** * ID of the fullstack framework used within the environment. diff --git a/runner/configuration/base-environment.ts b/runner/configuration/base-environment.ts index 9016541..64adb80 100644 --- a/runner/configuration/base-environment.ts +++ b/runner/configuration/base-environment.ts @@ -1,21 +1,21 @@ -import { readdirSync, readFileSync, statSync } from 'fs'; -import { basename, dirname, extname, join, resolve } from 'path'; -import { globSync } from 'tinyglobby'; +import {readdirSync, readFileSync, statSync} from 'fs'; +import {basename, dirname, extname, join, resolve} from 'path'; +import {globSync} from 'tinyglobby'; import { FrameworkInfo, MultiStepPromptDefinition, PromptDefinition, RootPromptDefinition, } from '../shared-interfaces.js'; -import { Rating } from '../ratings/rating-types.js'; -import { renderHandlebarsTemplate } from './prompt-templating.js'; -import { lazy } from '../utils/lazy-creation.js'; -import { EnvironmentConfig } from './environment-config.js'; -import { MultiStepPrompt } from './multi-step-prompt.js'; -import { UserFacingError } from '../utils/errors.js'; -import { generateId } from '../utils/id-generation.js'; -import { Gateway } from '../orchestration/gateway.js'; -import { Environment } from './environment.js'; +import {Rating} from '../ratings/rating-types.js'; +import {renderHandlebarsTemplate} from './prompt-templating.js'; +import {lazy} from '../utils/lazy-creation.js'; +import {EnvironmentConfig} from './environment-config.js'; +import {MultiStepPrompt} from './multi-step-prompt.js'; +import {UserFacingError} from '../utils/errors.js'; +import {generateId} from '../utils/id-generation.js'; +import {Gateway} from '../orchestration/gateway.js'; +import {Environment} from './environment.js'; /** Represents a single prompt evaluation environment. */ export abstract class BaseEnvironment { @@ -42,7 +42,7 @@ export abstract class BaseEnvironment { constructor( rootPath: string, - private readonly config: EnvironmentConfig + private readonly config: EnvironmentConfig, ) { this.rootPath = rootPath; this.id = config.id || this.generateId(config.displayName); @@ -50,20 +50,18 @@ export abstract class BaseEnvironment { this.clientSideFramework = { id: config.clientSideFramework, displayName: - this.getFrameworkDisplayName(config.clientSideFramework) || - config.clientSideFramework, + this.getFrameworkDisplayName(config.clientSideFramework) || config.clientSideFramework, }; this.fullStackFramework = config.fullStackFramework ? { id: config.fullStackFramework, displayName: - this.getFrameworkDisplayName(config.fullStackFramework) || - config.clientSideFramework, + this.getFrameworkDisplayName(config.fullStackFramework) || config.clientSideFramework, } - : { ...this.clientSideFramework }; + : {...this.clientSideFramework}; this.executablePrompts = this.resolveExecutablePrompts( config.executablePrompts, - config.ratings + config.ratings, ); this.codeRatingPromptPath = config.codeRatingPrompt ? join(rootPath, config.codeRatingPrompt) @@ -98,7 +96,7 @@ export abstract class BaseEnvironment { async getPrompt( type: 'generation' | 'editing', userPrompt: string, - ragEndpoint?: string + ragEndpoint?: string, ): Promise { const systemPrompt = type === 'generation' @@ -110,16 +108,12 @@ export abstract class BaseEnvironment { } if (!ragEndpoint.includes('PROMPT')) { - throw new UserFacingError( - 'The ragEndpoint must include the "PROMPT" substring.' - ); + throw new UserFacingError('The ragEndpoint must include the "PROMPT" substring.'); } const url = ragEndpoint.replace('PROMPT', encodeURIComponent(userPrompt)); const response = await fetch(url); if (!response.ok) { - throw new UserFacingError( - `Failed to fetch from ${url}: ${response.statusText}` - ); + throw new UserFacingError(`Failed to fetch from ${url}: ${response.statusText}`); } const ragContent = await response.text(); return `${systemPrompt}\n\n${ragContent}`; @@ -135,7 +129,7 @@ export abstract class BaseEnvironment { renderPrompt( content: string, promptFilePath: string | null, - additionalContext: Record = {} + additionalContext: Record = {}, ) { return renderHandlebarsTemplate(content, { rootDir: promptFilePath ? dirname(promptFilePath) : null, @@ -176,7 +170,7 @@ export abstract class BaseEnvironment { */ private resolveExecutablePrompts( prompts: EnvironmentConfig['executablePrompts'], - envRatings: Rating[] + envRatings: Rating[], ) { const result: RootPromptDefinition[] = []; @@ -197,14 +191,14 @@ export abstract class BaseEnvironment { name = def.name; } - globSync(path, { cwd: this.rootPath }).forEach((relativePath) => { + globSync(path, {cwd: this.rootPath}).forEach(relativePath => { result.push( this.getStepPromptDefinition( name ?? basename(relativePath, extname(relativePath)), relativePath, ratings, - /* isEditing */ false - ) + /* isEditing */ false, + ), ); }); } @@ -226,9 +220,9 @@ export abstract class BaseEnvironment { name: string, relativePath: string, ratings: Rating[], - isEditing: boolean + isEditing: boolean, ): PromptDefinition { - const { result, contextFiles } = this.renderRelativePrompt(relativePath); + const {result, contextFiles} = this.renderRelativePrompt(relativePath); return { name: name, @@ -248,7 +242,7 @@ export abstract class BaseEnvironment { */ private getMultiStepPrompt( def: MultiStepPrompt, - envRatings: Rating[] + envRatings: Rating[], ): MultiStepPromptDefinition { const promptRoot = resolve(this.rootPath, def.directoryPath); const name = basename(promptRoot); @@ -258,11 +252,11 @@ export abstract class BaseEnvironment { if (!statSync(promptRoot).isDirectory()) { throw new UserFacingError( - `Multi-step prompt root must point to a directory. "${promptRoot}" is not a directory.` + `Multi-step prompt root must point to a directory. "${promptRoot}" is not a directory.`, ); } - const entities = readdirSync(promptRoot, { withFileTypes: true }); + const entities = readdirSync(promptRoot, {withFileTypes: true}); if (entities.length === 0) { throw new UserFacingError('Multi-step prompt directory cannot be empty.'); @@ -271,7 +265,7 @@ export abstract class BaseEnvironment { for (const current of entities) { if (!current.isFile()) { throw new UserFacingError( - `Multi-step prompt directory can only contain files. ${current.name} is not a file.` + `Multi-step prompt directory can only contain files. ${current.name} is not a file.`, ); } @@ -280,7 +274,7 @@ export abstract class BaseEnvironment { if (!match || !match[1]) { throw new UserFacingError( `Multi-step prompt name must be in the form of \`step-\`, ` + - `but received '${current.name}'` + `but received '${current.name}'`, ); } @@ -298,7 +292,7 @@ export abstract class BaseEnvironment { `${name}-step-${stepNum}`, join(def.directoryPath, current.name), ratings, - /*isEditing */ stepNum !== 1 + /*isEditing */ stepNum !== 1, ); stepValues[step.name] = stepNum; @@ -316,9 +310,7 @@ export abstract class BaseEnvironment { const id = generateId(displayName); if (id === null) { - throw new UserFacingError( - `Could not auto-generate an ID from "${displayName}"` - ); + throw new UserFacingError(`Could not auto-generate an ID from "${displayName}"`); } return id; diff --git a/runner/configuration/constants.ts b/runner/configuration/constants.ts index d255ecd..dd83f1f 100644 --- a/runner/configuration/constants.ts +++ b/runner/configuration/constants.ts @@ -1,4 +1,4 @@ -import { join } from 'path'; +import {join} from 'path'; // Extracted out for convenience, do NOT export. const rootDir = join(process.cwd(), '.web-codegen-scorer'); @@ -37,12 +37,6 @@ export const REPORT_VERSION = 3; /** Environments that are shipped together with the eval tool. */ export const BUILT_IN_ENVIRONMENTS = new Map([ - [ - 'angular-example', - join(import.meta.dirname, '../../examples/environments/angular/config.js'), - ], - [ - 'solid-example', - join(import.meta.dirname, '../../examples/environments/solid/config.js'), - ], + ['angular-example', join(import.meta.dirname, '../../examples/environments/angular/config.js')], + ['solid-example', join(import.meta.dirname, '../../examples/environments/solid/config.js')], ]); diff --git a/runner/configuration/environment-config.ts b/runner/configuration/environment-config.ts index ab6c617..e15361e 100644 --- a/runner/configuration/environment-config.ts +++ b/runner/configuration/environment-config.ts @@ -1,14 +1,8 @@ import z from 'zod'; -import { createMessageBuilder, fromError } from 'zod-validation-error/v3'; -import { UserFacingError } from '../utils/errors.js'; -import { - LocalEnvironmentConfig, - localEnvironmentConfigSchema, -} from './environment-local.js'; -import { - RemoteEnvironmentConfig, - remoteEnvironmentConfigSchema, -} from './environment-remote.js'; +import {createMessageBuilder, fromError} from 'zod-validation-error/v3'; +import {UserFacingError} from '../utils/errors.js'; +import {LocalEnvironmentConfig, localEnvironmentConfigSchema} from './environment-local.js'; +import {RemoteEnvironmentConfig, remoteEnvironmentConfigSchema} from './environment-remote.js'; const environmentConfigSchema = z.union([ localEnvironmentConfigSchema, @@ -27,9 +21,7 @@ export function getPossiblePackageManagers() { } /** Asserts that the specified data is a valid environment config. */ -export function assertIsEnvironmentConfig( - value: unknown -): asserts value is EnvironmentConfig { +export function assertIsEnvironmentConfig(value: unknown): asserts value is EnvironmentConfig { const validationResult = environmentConfigSchema.safeParse(value); if (!validationResult.success) { @@ -48,7 +40,7 @@ export function assertIsEnvironmentConfig( } export function isLocalEnvironmentConfig( - config: EnvironmentConfig + config: EnvironmentConfig, ): config is LocalEnvironmentConfig { return (config as Partial).gateway === undefined; } diff --git a/runner/configuration/environment-local.ts b/runner/configuration/environment-local.ts index 232b0e0..9eefed2 100644 --- a/runner/configuration/environment-local.ts +++ b/runner/configuration/environment-local.ts @@ -1,17 +1,10 @@ -import { join } from 'path'; +import {join} from 'path'; import z from 'zod'; -import { - LlmRunner, - McpServerOptions, - mcpServerOptionsSchema, -} from '../codegen/llm-runner.js'; -import { LocalGateway } from '../orchestration/gateways/local_gateway.js'; -import { BaseEnvironment } from './base-environment.js'; -import { - EnvironmentConfig, - getPossiblePackageManagers, -} from './environment-config.js'; -import { baseEnvironmentConfigSchema } from './base-environment-config.js'; +import {LlmRunner, McpServerOptions, mcpServerOptionsSchema} from '../codegen/llm-runner.js'; +import {LocalGateway} from '../orchestration/gateways/local_gateway.js'; +import {BaseEnvironment} from './base-environment.js'; +import {EnvironmentConfig, getPossiblePackageManagers} from './environment-config.js'; +import {baseEnvironmentConfigSchema} from './base-environment-config.js'; export const localEnvironmentConfigSchema = baseEnvironmentConfigSchema.extend({ /** MCP servers that can be started for this environment. */ @@ -42,9 +35,7 @@ export const localEnvironmentConfigSchema = baseEnvironmentConfigSchema.extend({ skipInstall: z.boolean().optional(), }); -export type LocalEnvironmentConfig = z.infer< - typeof localEnvironmentConfigSchema ->; +export type LocalEnvironmentConfig = z.infer; /** Represents a single prompt evaluation environment. */ export class LocalEnvironment extends BaseEnvironment { @@ -76,7 +67,7 @@ export class LocalEnvironment extends BaseEnvironment { constructor( rootPath: string, config: LocalEnvironmentConfig, - readonly llm: LlmRunner + readonly llm: LlmRunner, ) { super(rootPath, config); @@ -86,23 +77,18 @@ export class LocalEnvironment extends BaseEnvironment { const projectTemplatePath = config.projectTemplate ? join(rootPath, config.projectTemplate) : null; - const sourceDirectory = config.sourceDirectory - ? join(rootPath, config.sourceDirectory) - : null; + const sourceDirectory = config.sourceDirectory ? join(rootPath, config.sourceDirectory) : null; this.packageManager = packageManager; this.installCommand = `${packageManager} install --silent`; this.buildCommand = config.buildCommand || `${packageManager} run build`; - this.serveCommand = - config.serveCommand || this.getDefaultServeCommand(packageManager); + this.serveCommand = config.serveCommand || this.getDefaultServeCommand(packageManager); this.projectTemplatePath = projectTemplatePath; this.sourceDirectory = sourceDirectory; this.mcpServerOptions = config.mcpServers || []; this.skipInstall = config.skipInstall ?? false; } - private getDefaultServeCommand( - packageManager: LocalEnvironmentConfig['packageManager'] - ): string { + private getDefaultServeCommand(packageManager: LocalEnvironmentConfig['packageManager']): string { const flags = '--port 0'; // npm needs -- to pass flags to the command. diff --git a/runner/configuration/environment-remote.ts b/runner/configuration/environment-remote.ts index cb31440..f64dad0 100644 --- a/runner/configuration/environment-remote.ts +++ b/runner/configuration/environment-remote.ts @@ -1,18 +1,14 @@ import z from 'zod'; -import { Gateway } from '../orchestration/gateway.js'; -import { BaseEnvironment } from './base-environment.js'; -import { baseEnvironmentConfigSchema } from './base-environment-config.js'; +import {Gateway} from '../orchestration/gateway.js'; +import {BaseEnvironment} from './base-environment.js'; +import {baseEnvironmentConfigSchema} from './base-environment-config.js'; -export const remoteEnvironmentConfigSchema = baseEnvironmentConfigSchema.extend( - { - // TODO: Follow-up with a gateway validator, or make class abstract. - gateway: z.custom>(), - } -); +export const remoteEnvironmentConfigSchema = baseEnvironmentConfigSchema.extend({ + // TODO: Follow-up with a gateway validator, or make class abstract. + gateway: z.custom>(), +}); -export type RemoteEnvironmentConfig = z.infer< - typeof remoteEnvironmentConfigSchema ->; +export type RemoteEnvironmentConfig = z.infer; /** Represents a single prompt evaluation environment. */ export class RemoteEnvironment extends BaseEnvironment { diff --git a/runner/configuration/environment-resolution.ts b/runner/configuration/environment-resolution.ts index 9c972e0..52a94c9 100644 --- a/runner/configuration/environment-resolution.ts +++ b/runner/configuration/environment-resolution.ts @@ -1,22 +1,19 @@ -import { dirname } from 'path'; -import { existsSync } from 'fs'; -import { - assertIsEnvironmentConfig, - isLocalEnvironmentConfig, -} from './environment-config.js'; -import { toProcessAbsolutePath } from '../file-system-utils.js'; -import { UserFacingError } from '../utils/errors.js'; -import { Environment } from './environment.js'; -import { LocalEnvironment } from './environment-local.js'; -import { RemoteEnvironment } from './environment-remote.js'; -import { getRunnerByName, RunnerName } from '../codegen/runner-creation.js'; +import {dirname} from 'path'; +import {existsSync} from 'fs'; +import {assertIsEnvironmentConfig, isLocalEnvironmentConfig} from './environment-config.js'; +import {toProcessAbsolutePath} from '../file-system-utils.js'; +import {UserFacingError} from '../utils/errors.js'; +import {Environment} from './environment.js'; +import {LocalEnvironment} from './environment-local.js'; +import {RemoteEnvironment} from './environment-remote.js'; +import {getRunnerByName, RunnerName} from '../codegen/runner-creation.js'; const environmentsCache = new Map(); /** Gets an environment with a specific config path. */ export async function getEnvironmentByPath( configPath: string, - runnerCliOption: RunnerName + runnerCliOption: RunnerName, ): Promise { configPath = toProcessAbsolutePath(configPath); @@ -25,20 +22,14 @@ export async function getEnvironmentByPath( } if (!existsSync(configPath)) { - throw new UserFacingError( - `Cannot find environment config file at ${configPath}` - ); + throw new UserFacingError(`Cannot find environment config file at ${configPath}`); } - const result: { default: unknown } = await import(configPath); + const result: {default: unknown} = await import(configPath); const rootPath = dirname(configPath); assertIsEnvironmentConfig(result.default); const environment = isLocalEnvironmentConfig(result.default) - ? new LocalEnvironment( - rootPath, - result.default, - await getRunnerByName(runnerCliOption) - ) + ? new LocalEnvironment(rootPath, result.default, await getRunnerByName(runnerCliOption)) : new RemoteEnvironment(rootPath, result.default); environmentsCache.set(configPath, environment); diff --git a/runner/configuration/environment.ts b/runner/configuration/environment.ts index 7f548a4..d8ef952 100644 --- a/runner/configuration/environment.ts +++ b/runner/configuration/environment.ts @@ -1,4 +1,4 @@ -import { LocalEnvironment } from './environment-local.js'; -import { RemoteEnvironment } from './environment-remote.js'; +import {LocalEnvironment} from './environment-local.js'; +import {RemoteEnvironment} from './environment-remote.js'; export type Environment = LocalEnvironment | RemoteEnvironment; diff --git a/runner/configuration/multi-step-prompt.ts b/runner/configuration/multi-step-prompt.ts index 99a031e..71ab55c 100644 --- a/runner/configuration/multi-step-prompt.ts +++ b/runner/configuration/multi-step-prompt.ts @@ -1,9 +1,9 @@ -import { Rating } from '../ratings/rating-types.js'; +import {Rating} from '../ratings/rating-types.js'; /** Definition of a multi-step prompt. */ export class MultiStepPrompt { constructor( readonly directoryPath: string, - readonly stepRatings: Record = {} + readonly stepRatings: Record = {}, ) {} } diff --git a/runner/configuration/prompt-templating.ts b/runner/configuration/prompt-templating.ts index 41d452b..7ecaae9 100644 --- a/runner/configuration/prompt-templating.ts +++ b/runner/configuration/prompt-templating.ts @@ -1,57 +1,52 @@ import Handlebars from 'handlebars'; -import { readFileSync } from 'fs'; +import {readFileSync} from 'fs'; import path from 'path'; -import { UserFacingError } from '../utils/errors.js'; +import {UserFacingError} from '../utils/errors.js'; function initializeHandlebars() { Handlebars.registerHelper('neq', (a, b) => a !== b); - Handlebars.registerPartial( - 'embed', - (ctx: { rootDir: string | null; file?: string }) => { - if (!ctx.file) { - throw new UserFacingError('file= is required'); - } - if (!ctx.rootDir) { - throw new UserFacingError( - 'Cannot use `embed` if a rootDir is not specified' - ); - } + Handlebars.registerPartial('embed', (ctx: {rootDir: string | null; file?: string}) => { + if (!ctx.file) { + throw new UserFacingError('file= is required'); + } + if (!ctx.rootDir) { + throw new UserFacingError('Cannot use `embed` if a rootDir is not specified'); + } - const fullPath = path.join(ctx.rootDir, ctx.file); - const content = readFileSync(fullPath, 'utf8'); + const fullPath = path.join(ctx.rootDir, ctx.file); + const content = readFileSync(fullPath, 'utf8'); - // Recursively support `embed`. - return Handlebars.compile(content, { strict: true })({ - ...ctx, - rootDir: path.dirname(fullPath), - }); - } - ); + // Recursively support `embed`. + return Handlebars.compile(content, {strict: true})({ + ...ctx, + rootDir: path.dirname(fullPath), + }); + }); } initializeHandlebars(); /** Renders the given content via Handlebars. */ -export function renderHandlebarsTemplate( +export function renderHandlebarsTemplate( content: string, - ctx: T + ctx: T, ) { - const template = Handlebars.compile(content, { strict: true }); + const template = Handlebars.compile(content, {strict: true}); const contextFiles: string[] = []; const result = template(ctx, { partials: { - contextFiles: (ctx) => { + contextFiles: ctx => { if (typeof ctx !== 'string') { throw new UserFacingError( '`contextFiles` must receive a comma-separated list of file patterns, ' + - "for example: `{{> contextFiles '**/*.ts, **/*.css, **/*.html' }}`" + "for example: `{{> contextFiles '**/*.ts, **/*.css, **/*.html' }}`", ); } if (contextFiles.length > 0) { throw new UserFacingError( 'There can be only one usage of `contextFiles` per prompt. ' + - 'Combine your usages into a single comma-separated string.' + 'Combine your usages into a single comma-separated string.', ); } @@ -59,7 +54,7 @@ export function renderHandlebarsTemplate( ...ctx .trim() .split(',') - .map((p) => p.trim()) + .map(p => p.trim()), ); if (contextFiles.length === 0) { diff --git a/runner/eval-cli.ts b/runner/eval-cli.ts index f9be502..21259a2 100644 --- a/runner/eval-cli.ts +++ b/runner/eval-cli.ts @@ -1,17 +1,14 @@ -import { Arguments, Argv, CommandModule } from 'yargs'; +import {Arguments, Argv, CommandModule} from 'yargs'; import chalk from 'chalk'; import { BUILT_IN_ENVIRONMENTS, DEFAULT_AUTORATER_MODEL_NAME, DEFAULT_MODEL_NAME, } from './configuration/constants.js'; -import { generateCodeAndAssess } from './orchestration/generate.js'; -import { - logReportToConsole, - writeReportToDisk, -} from './reporting/report-logging.js'; -import { RunnerName } from './codegen/runner-creation.js'; -import { UserFacingError } from './utils/errors.js'; +import {generateCodeAndAssess} from './orchestration/generate.js'; +import {logReportToConsole, writeReportToDisk} from './reporting/report-logging.js'; +import {RunnerName} from './codegen/runner-creation.js'; +import {UserFacingError} from './utils/errors.js'; export const EvalModule = { builder, @@ -66,8 +63,7 @@ function builder(argv: Argv): Argv { .option('local', { type: 'boolean', default: false, - description: - 'Whether to run the evaluation against locally-cached LLM output', + description: 'Whether to run the evaluation against locally-cached LLM output', }) .option('limit', { type: 'number', @@ -77,14 +73,13 @@ function builder(argv: Argv): Argv { .option('concurrency', { type: 'string', default: 'auto', - coerce: (v) => (v === 'auto' ? 'auto' : Number(v)), + coerce: v => (v === 'auto' ? 'auto' : Number(v)), description: 'Maximum number of evaluations to run concurrently', }) .option('output-directory', { type: 'string', alias: ['output-dir'], - description: - 'Directory in which to output the generated code for debugging', + description: 'Directory in which to output the generated code for debugging', }) .option('prompt-filter', { type: 'string', @@ -114,10 +109,7 @@ function builder(argv: Argv): Argv { }) .option('logging', { type: 'string', - default: - process.env['CI'] === '1' - ? ('text-only' as const) - : ('dynamic' as const), + default: process.env['CI'] === '1' ? ('text-only' as const) : ('dynamic' as const), defaultDescription: '`dynamic` (or `text-only` when `CI=1`)', requiresArg: true, choices: ['text-only', 'dynamic'] as const, @@ -143,8 +135,7 @@ function builder(argv: Argv): Argv { type: 'boolean', default: false, alias: ['user-journeys'], - description: - 'Whether to enable user journey testing through browser automation', + description: 'Whether to enable user journey testing through browser automation', }) .option('enable-auto-csp', { type: 'boolean', @@ -178,8 +169,8 @@ async function handler(cliArgs: Arguments): Promise { ' - Pass a path to an environment config file using the `--env` flag.', ' - Pass `--env=angular-example` or `--env=solid-example` to use one of our built-in example environments.', ' - Pass `--help` to see all available options.', - ].join('\n') - ) + ].join('\n'), + ), ); process.exit(0); } @@ -188,8 +179,7 @@ async function handler(cliArgs: Arguments): Promise { const runInfo = await generateCodeAndAssess({ runner: cliArgs.runner, model: cliArgs.model, - environmentConfigPath: - BUILT_IN_ENVIRONMENTS.get(cliArgs.environment) || cliArgs.environment, + environmentConfigPath: BUILT_IN_ENVIRONMENTS.get(cliArgs.environment) || cliArgs.environment, localMode: cliArgs.local, limit: cliArgs.limit, concurrency: cliArgs.concurrency as number, @@ -215,9 +205,7 @@ async function handler(cliArgs: Arguments): Promise { if (error instanceof UserFacingError) { console.error(chalk.red(error.message)); } else { - console.error( - chalk.red('An error occurred during the assessment process:') - ); + console.error(chalk.red('An error occurred during the assessment process:')); console.error(chalk.red(error)); if ((error as Partial).stack) { console.error(chalk.red((error as Error).stack)); diff --git a/runner/file-system-utils.ts b/runner/file-system-utils.ts index c15a04b..36997af 100644 --- a/runner/file-system-utils.ts +++ b/runner/file-system-utils.ts @@ -1,15 +1,6 @@ -import { join, dirname, isAbsolute } from 'path'; -import { existsSync } from 'fs'; -import { - mkdir, - copyFile, - readdir, - writeFile, - lstat, - symlink, - rm, - unlink, -} from 'fs/promises'; +import {join, dirname, isAbsolute} from 'path'; +import {existsSync} from 'fs'; +import {mkdir, copyFile, readdir, writeFile, lstat, symlink, rm, unlink} from 'fs/promises'; /** * Recursively copies a folder from a source path to a destination path, @@ -19,13 +10,9 @@ import { * @param destination The path to the destination folder. * @param exclude An optional set of directory names to exclude from copying. */ -export async function copyFolderExcept( - source: string, - destination: string, - exclude?: Set -) { - await mkdir(destination, { recursive: true }); - const entries = await readdir(source, { withFileTypes: true }); +export async function copyFolderExcept(source: string, destination: string, exclude?: Set) { + await mkdir(destination, {recursive: true}); + const entries = await readdir(source, {withFileTypes: true}); for (const entry of entries) { const sourcePath = join(source, entry.name); @@ -46,7 +33,7 @@ export async function copyFolderExcept( * inside symlinked directories. */ export async function removeFolderWithSymlinks(dir: string) { - const entries = await readdir(dir, { withFileTypes: true }); + const entries = await readdir(dir, {withFileTypes: true}); for (const entry of entries) { const entryPath = join(dir, entry.name); @@ -66,12 +53,12 @@ export async function removeFolderWithSymlinks(dir: string) { export async function safeWriteFile( filePath: string, content: string | Buffer, - encoding?: BufferEncoding + encoding?: BufferEncoding, ): Promise { const directory = dirname(filePath); if (!existsSync(directory)) { - await mkdir(directory, { recursive: true }); + await mkdir(directory, {recursive: true}); } await writeFile(filePath, content, encoding); @@ -88,7 +75,7 @@ export async function safeWriteFile( */ export async function createSymlinkIfNotExists( sourcePath: string, - targetPath: string + targetPath: string, ): Promise { try { await lstat(targetPath); diff --git a/runner/index.ts b/runner/index.ts index dd01663..66a12f4 100644 --- a/runner/index.ts +++ b/runner/index.ts @@ -5,37 +5,28 @@ export { type RemoteEnvironmentConfig, RemoteEnvironment, } from './configuration/environment-remote.js'; -export { - type LocalEnvironmentConfig, - LocalEnvironment, -} from './configuration/environment-local.js'; +export {type LocalEnvironmentConfig, LocalEnvironment} from './configuration/environment-local.js'; export * from './ratings/built-in.js'; export * from './ratings/rating-types.js'; export * from './ratings/built-in-ratings/index.js'; -export { - calculateBuildAndCheckStats, - isPositiveScore, -} from './ratings/stats.js'; -export { MultiStepPrompt } from './configuration/multi-step-prompt.js'; +export {calculateBuildAndCheckStats, isPositiveScore} from './ratings/stats.js'; +export {MultiStepPrompt} from './configuration/multi-step-prompt.js'; export { BuildErrorType, BuildResultStatus, type BuildResult, } from './workers/builder/builder-types.js'; -export { type UserJourneysResult } from './orchestration/user-journeys.js'; -export { type AutoRateResult } from './ratings/autoraters/auto-rate-shared.js'; -export { type McpServerOptions } from './codegen/llm-runner.js'; -export { - DEFAULT_MODEL_NAME, - REPORT_VERSION, -} from './configuration/constants.js'; -export { generateCodeAndAssess } from './orchestration/generate.js'; -export { groupSimilarReports } from './orchestration/grouping.js'; -export { type LlmRunner } from './codegen/llm-runner.js'; -export { GenkitRunner } from './codegen/genkit/genkit-runner.js'; -export { GeminiCliRunner } from './codegen/gemini-cli/gemini-cli-runner.js'; -export { getRunnerByName, type RunnerName } from './codegen/runner-creation.js'; -export { getEnvironmentByPath } from './configuration/environment-resolution.js'; -export { type Environment } from './configuration/environment.js'; -export { autoRateFiles } from './ratings/autoraters/rate-files.js'; -export { fetchReportsFromDisk } from './reporting/report-local-disk.js'; +export {type UserJourneysResult} from './orchestration/user-journeys.js'; +export {type AutoRateResult} from './ratings/autoraters/auto-rate-shared.js'; +export {type McpServerOptions} from './codegen/llm-runner.js'; +export {DEFAULT_MODEL_NAME, REPORT_VERSION} from './configuration/constants.js'; +export {generateCodeAndAssess} from './orchestration/generate.js'; +export {groupSimilarReports} from './orchestration/grouping.js'; +export {type LlmRunner} from './codegen/llm-runner.js'; +export {GenkitRunner} from './codegen/genkit/genkit-runner.js'; +export {GeminiCliRunner} from './codegen/gemini-cli/gemini-cli-runner.js'; +export {getRunnerByName, type RunnerName} from './codegen/runner-creation.js'; +export {getEnvironmentByPath} from './configuration/environment-resolution.js'; +export {type Environment} from './configuration/environment.js'; +export {autoRateFiles} from './ratings/autoraters/rate-files.js'; +export {fetchReportsFromDisk} from './reporting/report-local-disk.js'; diff --git a/runner/init-cli.ts b/runner/init-cli.ts index c96ef2e..c5a622c 100644 --- a/runner/init-cli.ts +++ b/runner/init-cli.ts @@ -1,19 +1,18 @@ -import { Argv, CommandModule, Options } from 'yargs'; -import { input, confirm } from '@inquirer/prompts'; +import {Argv, CommandModule, Options} from 'yargs'; +import {input, confirm} from '@inquirer/prompts'; import chalk from 'chalk'; -import { join, relative, dirname } from 'path'; -import { cp } from 'fs/promises'; -import { formatTitleCard } from './reporting/format.js'; -import { generateId } from './utils/id-generation.js'; -import { safeWriteFile, toProcessAbsolutePath } from './file-system-utils.js'; -import { MODEL_PROVIDERS } from './codegen/genkit/models.js'; +import {join, relative, dirname} from 'path'; +import {cp} from 'fs/promises'; +import {formatTitleCard} from './reporting/format.js'; +import {generateId} from './utils/id-generation.js'; +import {safeWriteFile, toProcessAbsolutePath} from './file-system-utils.js'; +import {MODEL_PROVIDERS} from './codegen/genkit/models.js'; export const InitModule = { builder, handler, command: 'init', - describe: - 'Interactive guide through the process of creating an eval environment', + describe: 'Interactive guide through the process of creating an eval environment', } satisfies CommandModule<{}, Options>; interface InitOptions { @@ -50,20 +49,20 @@ async function getAnswers(): Promise { [ 'Welcome LLM enthusiast! 🎉', 'Answer the following questions to create an eval environment', - ].join('\n') - ) + ].join('\n'), + ), ); // Add some spaces at the end to align to the text of the line above. const newLineSeparator = '\n '; - const apiKeyVariables = MODEL_PROVIDERS.map((p) => p.apiKeyVariableName); + const apiKeyVariables = MODEL_PROVIDERS.map(p => p.apiKeyVariableName); - if (!apiKeyVariables.some((name) => process.env[name])) { + if (!apiKeyVariables.some(name => process.env[name])) { const hasConfirmed = await confirm({ message: chalk.red( `Could not detect an API key in any of the following environment variables: ${apiKeyVariables.join(', ')}` + newLineSeparator + - 'You may not be able to run the evals. Do you want to continue generating an environment anyway?' + 'You may not be able to run the evals. Do you want to continue generating an environment anyway?', ), }); @@ -81,10 +80,8 @@ async function getAnswers(): Promise { message: 'Where should we place the environment config file?', required: true, default: join(generateId(displayName) || 'env', 'config.mjs'), - validate: (value) => - value.endsWith('.js') || value.endsWith('.mjs') - ? true - : 'Config must be a .mjs or .js file', + validate: value => + value.endsWith('.js') || value.endsWith('.mjs') ? true : 'Config must be a .mjs or .js file', }); const clientSideFramework = await input({ message: 'What client-side framework will it be using?', @@ -131,20 +128,17 @@ async function writeConfig(options: InitOptions) { if (options.generationSystemPrompt) { generationPromptPath = relative( configDir, - toProcessAbsolutePath(options.generationSystemPrompt) + toProcessAbsolutePath(options.generationSystemPrompt), ); } else { generationPromptPath = './example-system-instructions.md'; - await safeWriteFile( - join(configDir, generationPromptPath), - getExampleSystemInstructions() - ); + await safeWriteFile(join(configDir, generationPromptPath), getExampleSystemInstructions()); } if (options.executablePrompts) { executablePromptsPattern = relative( configDir, - toProcessAbsolutePath(options.executablePrompts) + toProcessAbsolutePath(options.executablePrompts), ); } else { const executablePromptDir = './example-prompts'; @@ -153,7 +147,7 @@ async function writeConfig(options: InitOptions) { await cp( join(import.meta.dirname, '../examples/prompts'), join(configDir, executablePromptDir), - { recursive: true } + {recursive: true}, ); } @@ -196,7 +190,7 @@ async function writeConfig(options: InitOptions) { ` // you can specify a different full-stack framework here.`, ` // fullStackFramework: '',`, `};`, - ].join('\n') + ].join('\n'), ); console.log( @@ -204,8 +198,8 @@ async function writeConfig(options: InitOptions) { [ 'Done! 🎉 You can run your eval with the following command:', `web-codegen-scorer eval --env=${options.configPath}`, - ].join('\n') - ) + ].join('\n'), + ), ); } diff --git a/runner/orchestration/build-repair.ts b/runner/orchestration/build-repair.ts index 6a53fe7..b275fa1 100644 --- a/runner/orchestration/build-repair.ts +++ b/runner/orchestration/build-repair.ts @@ -6,12 +6,12 @@ import { LlmResponseFile, RootPromptDefinition, } from '../shared-interfaces.js'; -import { Environment } from '../configuration/environment.js'; -import { repairCodeWithAI } from './codegen.js'; -import { writeResponseFiles } from './file-system.js'; -import { runBuild } from './build-worker.js'; -import { ProgressLogger } from '../progress/progress-logger.js'; -import { EvalID, Gateway } from './gateway.js'; +import {Environment} from '../configuration/environment.js'; +import {repairCodeWithAI} from './codegen.js'; +import {writeResponseFiles} from './file-system.js'; +import {runBuild} from './build-worker.js'; +import {ProgressLogger} from '../progress/progress-logger.js'; +import {EvalID, Gateway} from './gateway.js'; /** * Calls the LLM to repair code, handles the response, and attempts to build the project again. @@ -45,7 +45,7 @@ export async function repairAndBuild( abortSignal: AbortSignal, workerConcurrencyQueue: PQueue, attempts: number, - progress: ProgressLogger + progress: ProgressLogger, ): Promise { const repairResponse = await repairCodeWithAI( evalID, @@ -59,7 +59,7 @@ export async function repairAndBuild( errorContext, contextFiles, abortSignal, - progress + progress, ); return await handleRepairResponse( @@ -73,7 +73,7 @@ export async function repairAndBuild( workerConcurrencyQueue, abortSignal, attempts, - progress + progress, ); } @@ -92,24 +92,22 @@ async function handleRepairResponse( workerConcurrencyQueue: PQueue, abortSignal: AbortSignal, attempts: number, - progress: ProgressLogger + progress: ProgressLogger, ) { if (!repairResponse.success) { progress.log( rootPromptDef, 'error', - `AI failed to generate a response for repair attempt #${attempts + 1}` + `AI failed to generate a response for repair attempt #${attempts + 1}`, ); // Stop trying to repair if AI can't suggest a fix (API request fails) - throw new Error( - `Repair request failed: ${repairResponse.errors.join('\n')}` - ); + throw new Error(`Repair request failed: ${repairResponse.errors.join('\n')}`); } // Clone the previous files because `mergeRepairFiles` mutates the attempt files. // We don't want to change files of a previous attempt. - const newAttemptFiles = previousAttemptFiles.map((f) => ({ ...f })); + const newAttemptFiles = previousAttemptFiles.map(f => ({...f})); mergeRepairFiles(repairResponse.outputFiles, newAttemptFiles); writeResponseFiles(directory, newAttemptFiles, env, rootPromptDef.name); @@ -122,7 +120,7 @@ async function handleRepairResponse( rootPromptDef, abortSignal, workerConcurrencyQueue, - progress + progress, ); return { @@ -141,15 +139,12 @@ async function handleRepairResponse( * @param repairOutputFiles The array of new or updated files to merge. * @param finalFiles The array of files to be updated. */ -function mergeRepairFiles( - repairOutputFiles: LlmResponseFile[], - finalFiles: LlmResponseFile[] -) { +function mergeRepairFiles(repairOutputFiles: LlmResponseFile[], finalFiles: LlmResponseFile[]) { // Merge the repair response into the original files. Otherwise we may end up dropping // files that were valid in the initial response and the LLM decided not to touch, because // they're still valid. for (const file of repairOutputFiles) { - const existingFile = finalFiles.find((f) => f.filePath === file.filePath); + const existingFile = finalFiles.find(f => f.filePath === file.filePath); if (existingFile) { existingFile.code = file.code; diff --git a/runner/orchestration/build-serve-loop.ts b/runner/orchestration/build-serve-loop.ts index 03a6220..4e8e897 100644 --- a/runner/orchestration/build-serve-loop.ts +++ b/runner/orchestration/build-serve-loop.ts @@ -1,19 +1,15 @@ import PQueue from 'p-queue'; -import { LlmGenerateFilesResponse } from '../codegen/llm-runner.js'; -import { BuildResultStatus } from '../workers/builder/builder-types.js'; -import { Environment } from '../configuration/environment.js'; -import { - AttemptDetails, - LlmContextFile, - RootPromptDefinition, -} from '../shared-interfaces.js'; -import { DEFAULT_MAX_REPAIR_ATTEMPTS } from '../configuration/constants.js'; -import { ProgressLogger } from '../progress/progress-logger.js'; -import { runBuild } from './build-worker.js'; -import { repairAndBuild } from './build-repair.js'; -import { EvalID, Gateway } from './gateway.js'; -import { serveAndTestApp } from './serve-testing-worker.js'; -import { BrowserAgentTaskInput } from '../testing/browser-agent/models.js'; +import {LlmGenerateFilesResponse} from '../codegen/llm-runner.js'; +import {BuildResultStatus} from '../workers/builder/builder-types.js'; +import {Environment} from '../configuration/environment.js'; +import {AttemptDetails, LlmContextFile, RootPromptDefinition} from '../shared-interfaces.js'; +import {DEFAULT_MAX_REPAIR_ATTEMPTS} from '../configuration/constants.js'; +import {ProgressLogger} from '../progress/progress-logger.js'; +import {runBuild} from './build-worker.js'; +import {repairAndBuild} from './build-repair.js'; +import {EvalID, Gateway} from './gateway.js'; +import {serveAndTestApp} from './serve-testing-worker.js'; +import {BrowserAgentTaskInput} from '../testing/browser-agent/models.js'; /** * Attempts to build the code that an LLM generated. If the build fails, attempts @@ -51,7 +47,7 @@ export async function attemptBuild( skipAxeTesting: boolean, enableAutoCsp: boolean, userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined, - maxAxeRepairAttempts: number + maxAxeRepairAttempts: number, ) { const initialBuildResult = await runBuild( evalID, @@ -61,7 +57,7 @@ export async function attemptBuild( rootPromptDef, abortSignal, workerConcurrencyQueue, - progress + progress, ); let repairAttempts = 0; const maxRepairAttempts = gateway.shouldRetryFailedBuilds(evalID) @@ -71,7 +67,7 @@ export async function attemptBuild( const initialAttempt = { outputFiles: initialResponse.files, usage: { - ...{ inputTokens: 0, outputTokens: 0, totalTokens: 0 }, + ...{inputTokens: 0, outputTokens: 0, totalTokens: 0}, ...initialResponse.usage, }, reasoning: initialResponse.reasoning, @@ -90,7 +86,7 @@ export async function attemptBuild( progress.log( rootPromptDef, 'build', - `Trying to repair app build (attempt #${repairAttempts + 1})` + `Trying to repair app build (attempt #${repairAttempts + 1})`, ); const attempt = await repairAndBuild( @@ -107,7 +103,7 @@ export async function attemptBuild( abortSignal, workerConcurrencyQueue, repairAttempts, - progress + progress, ); attemptDetails.push(attempt); @@ -129,7 +125,7 @@ export async function attemptBuild( skipScreenshots, skipAxeTesting, enableAutoCsp, - userJourneyAgentTaskInput + userJourneyAgentTaskInput, ); } @@ -146,13 +142,13 @@ export async function attemptBuild( progress.log( rootPromptDef, 'build', - `Trying to repair axe accessibility violations (attempt #${axeRepairAttempts + 1})...` + `Trying to repair axe accessibility violations (attempt #${axeRepairAttempts + 1})...`, ); const axeViolationsError = JSON.stringify( lastAttempt.serveTestingResult.axeViolations, null, - 2 + 2, ); progress.log(rootPromptDef, 'error', 'Found Axe accessibility violations'); @@ -171,7 +167,7 @@ export async function attemptBuild( abortSignal, workerConcurrencyQueue, axeRepairAttempts + repairAttempts, - progress + progress, ); attemptDetails.push(attempt); @@ -198,15 +194,11 @@ export async function attemptBuild( skipScreenshots, skipAxeTesting, enableAutoCsp, - userJourneyAgentTaskInput + userJourneyAgentTaskInput, ); if (attempt.serveTestingResult.axeViolations?.length === 0) { - progress.log( - rootPromptDef, - 'success', - `Successfully fixed all Axe accessibility violations` - ); + progress.log(rootPromptDef, 'success', `Successfully fixed all Axe accessibility violations`); } } diff --git a/runner/orchestration/build-worker.ts b/runner/orchestration/build-worker.ts index 927f7ba..0c5f5d9 100644 --- a/runner/orchestration/build-worker.ts +++ b/runner/orchestration/build-worker.ts @@ -1,11 +1,8 @@ -import { - BuildResult, - BuildResultStatus, -} from '../workers/builder/builder-types.js'; -import { Environment } from '../configuration/environment.js'; -import { ProgressLogger } from '../progress/progress-logger.js'; -import { RootPromptDefinition } from '../shared-interfaces.js'; -import { EvalID, Gateway } from './gateway.js'; +import {BuildResult, BuildResultStatus} from '../workers/builder/builder-types.js'; +import {Environment} from '../configuration/environment.js'; +import {ProgressLogger} from '../progress/progress-logger.js'; +import {RootPromptDefinition} from '../shared-interfaces.js'; +import {EvalID, Gateway} from './gateway.js'; import PQueue from 'p-queue'; /** Attempts to build the code. */ @@ -17,7 +14,7 @@ export async function runBuild( rootPromptDef: RootPromptDefinition, abortSignal: AbortSignal, workerConcurrencyQueue: PQueue, - progress: ProgressLogger + progress: ProgressLogger, ): Promise { progress.log(rootPromptDef, 'build', `Building the app`); @@ -29,7 +26,7 @@ export async function runBuild( rootPromptDef, workerConcurrencyQueue, abortSignal, - progress + progress, ); if (result.status === BuildResultStatus.SUCCESS) { progress.log(rootPromptDef, 'success', 'Build is successful'); @@ -38,12 +35,7 @@ export async function runBuild( } return result; } catch (err) { - progress.log( - rootPromptDef, - 'error', - `Error during build process`, - err + '' - ); + progress.log(rootPromptDef, 'error', `Error during build process`, err + ''); throw err; } } diff --git a/runner/orchestration/codegen.ts b/runner/orchestration/codegen.ts index 312176d..0ff1097 100644 --- a/runner/orchestration/codegen.ts +++ b/runner/orchestration/codegen.ts @@ -6,16 +6,12 @@ import { ToolLogEntry, Usage, } from '../shared-interfaces.js'; -import { - LlmGenerateFilesContext, - LlmRunner, - PromptDataMessage, -} from '../codegen/llm-runner.js'; -import { Environment } from '../configuration/environment.js'; -import { getPossiblePackageManagers } from '../configuration/environment-config.js'; -import { ProgressLogger } from '../progress/progress-logger.js'; -import { EvalID, Gateway } from './gateway.js'; -import { LocalEnvironment } from '../configuration/environment-local.js'; +import {LlmGenerateFilesContext, LlmRunner, PromptDataMessage} from '../codegen/llm-runner.js'; +import {Environment} from '../configuration/environment.js'; +import {getPossiblePackageManagers} from '../configuration/environment-config.js'; +import {ProgressLogger} from '../progress/progress-logger.js'; +import {EvalID, Gateway} from './gateway.js'; +import {LocalEnvironment} from '../configuration/environment-local.js'; /** * Generates code using the configured AI model based on the provided prompt. @@ -25,7 +21,7 @@ export async function generateCodeWithAI( model: string, codegenContext: LlmGenerateFilesContext, contextFiles: LlmContextFile[], - abortSignal: AbortSignal + abortSignal: AbortSignal, ): Promise { const outputFiles: LlmResponseFile[] = []; const filesToIndexes = new Map(); @@ -36,9 +32,7 @@ export async function generateCodeWithAI( let toolLogs: ToolLogEntry[]; const contextMessageData = prepareContextFilesMessage(contextFiles); - const messages: PromptDataMessage[] | undefined = contextMessageData - ? [contextMessageData] - : []; + const messages: PromptDataMessage[] | undefined = contextMessageData ? [contextMessageData] : []; try { const response = await llm.generateFiles({ @@ -72,7 +66,7 @@ export async function generateCodeWithAI( success = true; } catch (error) { - usage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 }; + usage = {inputTokens: 0, outputTokens: 0, totalTokens: 0}; success = false; reasoning = ''; toolLogs = []; @@ -104,7 +98,7 @@ export async function repairCodeWithAI( errorContext: string, contextFiles: LlmContextFile[], abortSignal: AbortSignal, - progress: ProgressLogger + progress: ProgressLogger, ): Promise { const repairSystemInstructions = env.systemPromptRepair(); const repairPrompt = [ @@ -114,9 +108,7 @@ export async function repairCodeWithAI( '```', '', 'In the following source code:', - ...appFiles.map( - (file) => `${file.filePath}:\n\`\`\`\n${file.code}\`\`\`\n\n` - ), + ...appFiles.map(file => `${file.filePath}:\n\`\`\`\n${file.code}\`\`\`\n\n`), ].join('\n'); const context: LlmGenerateFilesContext = { @@ -124,10 +116,8 @@ export async function repairCodeWithAI( systemInstructions: repairSystemInstructions, executablePrompt: repairPrompt, combinedPrompt: `${repairSystemInstructions}\n${repairPrompt}`, - packageManager: - env instanceof LocalEnvironment ? env.packageManager : undefined, - buildCommand: - env instanceof LocalEnvironment ? env.buildCommand : undefined, + packageManager: env instanceof LocalEnvironment ? env.packageManager : undefined, + buildCommand: env instanceof LocalEnvironment ? env.buildCommand : undefined, possiblePackageManagers: getPossiblePackageManagers().slice(), }; @@ -140,7 +130,7 @@ export async function repairCodeWithAI( errorMessage, appFiles, contextFiles, - abortSignal + abortSignal, ); if (response.success) { @@ -148,22 +138,17 @@ export async function repairCodeWithAI( promptDef, 'codegen', 'Received AI repair response', - createLlmResponseTokenUsageMessage(response) ?? '' + createLlmResponseTokenUsageMessage(response) ?? '', ); } else { - progress.log( - promptDef, - 'error', - 'Failed to repair code with AI', - response.errors.join(', ') - ); + progress.log(promptDef, 'error', 'Failed to repair code with AI', response.errors.join(', ')); } return response; } export function prepareContextFilesMessage( - contextFiles: LlmContextFile[] + contextFiles: LlmContextFile[], ): PromptDataMessage | null { if (contextFiles.length === 0) { return null; @@ -177,16 +162,12 @@ export function prepareContextFilesMessage( return { role: 'user', - content: [{ text: contextMessage }], + content: [{text: contextMessage}], }; } -export function createLlmResponseTokenUsageMessage( - response: LlmResponse -): string | null { - return response.usage.inputTokens || - response.usage.outputTokens || - response.usage.totalTokens +export function createLlmResponseTokenUsageMessage(response: LlmResponse): string | null { + return response.usage.inputTokens || response.usage.outputTokens || response.usage.totalTokens ? `(input tokens: ${response.usage.inputTokens}, output tokens: ${response.usage.outputTokens}, total tokens: ${response.usage.totalTokens})` : null; } diff --git a/runner/orchestration/file-system.ts b/runner/orchestration/file-system.ts index 3ab0deb..e9e5530 100644 --- a/runner/orchestration/file-system.ts +++ b/runner/orchestration/file-system.ts @@ -1,25 +1,21 @@ -import { tmpdir } from 'os'; -import { LLM_OUTPUT_DIR } from '../configuration/constants.js'; -import { Environment } from '../configuration/environment.js'; +import {tmpdir} from 'os'; +import {LLM_OUTPUT_DIR} from '../configuration/constants.js'; +import {Environment} from '../configuration/environment.js'; import { copyFolderExcept, createSymlinkIfNotExists, removeFolderWithSymlinks, safeWriteFile, } from '../file-system-utils.js'; -import { - LlmContextFile, - LlmResponseFile, - RootPromptDefinition, -} from '../shared-interfaces.js'; -import { join } from 'path'; -import { existsSync } from 'fs'; -import { mkdir, mkdtemp, readFile } from 'fs/promises'; -import { globSync } from 'tinyglobby'; -import { executeCommand } from '../utils/exec.js'; -import { UserFacingError } from '../utils/errors.js'; -import { ProgressLogger } from '../progress/progress-logger.js'; -import { LocalEnvironment } from '../configuration/environment-local.js'; +import {LlmContextFile, LlmResponseFile, RootPromptDefinition} from '../shared-interfaces.js'; +import {join} from 'path'; +import {existsSync} from 'fs'; +import {mkdir, mkdtemp, readFile} from 'fs/promises'; +import {globSync} from 'tinyglobby'; +import {executeCommand} from '../utils/exec.js'; +import {UserFacingError} from '../utils/errors.js'; +import {ProgressLogger} from '../progress/progress-logger.js'; +import {LocalEnvironment} from '../configuration/environment-local.js'; const SYMLINK_PROJECT_PATHS = new Set(['node_modules']); const PENDING_INSTALLS = new Map>(); @@ -36,7 +32,7 @@ export async function setupProjectStructure( env: Environment, rootPromptDef: RootPromptDefinition, progress: ProgressLogger, - outputDirectory?: string + outputDirectory?: string, ) { let directory: string; let cleanup: () => Promise; @@ -46,15 +42,13 @@ export async function setupProjectStructure( // is specified since the main use case is debugging. directory = join(outputDirectory, env.id, rootPromptDef.name); - await mkdir(directory, { recursive: true }); + await mkdir(directory, {recursive: true}); // Don't clean up the custom output directory so it can be inspected. cleanup = () => Promise.resolve(); } else { // When outputting to the temporary directory, make sure that the directory is unique. - directory = await mkdtemp( - join(tmpdir(), `fw-${env.id}-build-${rootPromptDef.name}`) - ); + directory = await mkdtemp(join(tmpdir(), `fw-${env.id}-build-${rootPromptDef.name}`)); cleanup = async () => { try { @@ -73,12 +67,7 @@ export async function setupProjectStructure( // evals can reuse the same dependencies. It also allows pnpm workspaces to work // properly since we might not have copied the `pnpm-workspaces.yml`. if (!env.isBuiltIn) { - await installDependenciesInDirectory( - env, - rootPromptDef, - env.projectTemplatePath, - progress - ); + await installDependenciesInDirectory(env, rootPromptDef, env.projectTemplatePath, progress); } } @@ -89,12 +78,7 @@ export async function setupProjectStructure( // Also try to install dependencies in the source directory, // because it may be overriding the ones from the template. if (!env.isBuiltIn) { - await installDependenciesInDirectory( - env, - rootPromptDef, - env.sourceDirectory, - progress - ); + await installDependenciesInDirectory(env, rootPromptDef, env.sourceDirectory, progress); } } @@ -103,10 +87,7 @@ export async function setupProjectStructure( if (!env.isBuiltIn) { for (const symlinkPath of SYMLINK_PROJECT_PATHS) { - await createSymlinkIfNotExists( - join(dirToCopy, symlinkPath), - join(directory, symlinkPath) - ); + await createSymlinkIfNotExists(join(dirToCopy, symlinkPath), join(directory, symlinkPath)); } } } @@ -115,15 +96,10 @@ export async function setupProjectStructure( // Since running an installation inside `node_modules` can be problematic, we install // in the temporary directory instead. This can be slower, but is more reliable. if (env instanceof LocalEnvironment && env.isBuiltIn) { - await installDependenciesInDirectory( - env, - rootPromptDef, - directory, - progress - ); + await installDependenciesInDirectory(env, rootPromptDef, directory, progress); } - return { directory, cleanup }; + return {directory, cleanup}; } /** Run the package manager install command in a specific directory. */ @@ -131,7 +107,7 @@ function installDependenciesInDirectory( env: LocalEnvironment, rootPromptDef: RootPromptDefinition, directory: string, - progress: ProgressLogger + progress: ProgressLogger, ): Promise { // The install script will error out if there's no `package.json`. if (env.skipInstall || !existsSync(join(directory, 'package.json'))) { @@ -155,9 +131,7 @@ function installDependenciesInDirectory( return undefined; }) .catch(() => { - throw new UserFacingError( - `Failed to install dependencies in ${directory}` - ); + throw new UserFacingError(`Failed to install dependencies in ${directory}`); }) .finally(() => { PENDING_INSTALLS.delete(key); @@ -174,7 +148,7 @@ function installDependenciesInDirectory( */ export async function resolveContextFiles( patterns: string[], - directory: string + directory: string, ): Promise { if (patterns.length === 0) { return Promise.resolve([]); @@ -193,10 +167,10 @@ export async function resolveContextFiles( }); return Promise.all( - paths.map(async (relativePath) => ({ + paths.map(async relativePath => ({ relativePath, content: await readFile(join(directory, relativePath), 'utf8'), - })) + })), ); } @@ -211,10 +185,10 @@ export async function writeResponseFiles( directory: string, files: LlmResponseFile[], env: Environment, - promptName: string + promptName: string, ): Promise { const llmOutputDir = join(LLM_OUTPUT_DIR, env.id, promptName); - const filePromises = files.map(async (file) => { + const filePromises = files.map(async file => { // Write file to a tmp folder first for debugging await safeWriteFile(join(llmOutputDir, file.filePath), file.code); diff --git a/runner/orchestration/gateway.ts b/runner/orchestration/gateway.ts index da631c0..7e2bf01 100644 --- a/runner/orchestration/gateway.ts +++ b/runner/orchestration/gateway.ts @@ -1,16 +1,16 @@ import PQueue from 'p-queue'; -import { LlmGenerateFilesContext } from '../codegen/llm-runner.js'; -import { Environment } from '../configuration/environment.js'; -import { ProgressLogger } from '../progress/progress-logger.js'; +import {LlmGenerateFilesContext} from '../codegen/llm-runner.js'; +import {Environment} from '../configuration/environment.js'; +import {ProgressLogger} from '../progress/progress-logger.js'; import { LlmContextFile, LlmResponse, LlmResponseFile, RootPromptDefinition, } from '../shared-interfaces.js'; -import { BuildResult } from '../workers/builder/builder-types.js'; +import {BuildResult} from '../workers/builder/builder-types.js'; -export type EvalID = string & { __evalID: true }; +export type EvalID = string & {__evalID: true}; export interface Gateway { /** Initializes an eval. */ @@ -22,7 +22,7 @@ export interface Gateway { requestCtx: LlmGenerateFilesContext, model: string, contextFiles: LlmContextFile[], - abortSignal: AbortSignal + abortSignal: AbortSignal, ): Promise; repairBuild( @@ -32,7 +32,7 @@ export interface Gateway { errorMessage: string, appFiles: LlmResponseFile[], contextFiles: LlmContextFile[], - abortSignal: AbortSignal + abortSignal: AbortSignal, ): Promise; shouldRetryFailedBuilds(evalID: EvalID): boolean; @@ -44,7 +44,7 @@ export interface Gateway { rootPromptDef: RootPromptDefinition, workerConcurrencyQueue: PQueue, abortSignal: AbortSignal, - progress: ProgressLogger + progress: ProgressLogger, ): Promise; serveBuild( @@ -53,7 +53,7 @@ export interface Gateway { appDirectoryPath: string, rootPromptDef: RootPromptDefinition, progress: ProgressLogger, - logicWhileServing: (serveUrl: string) => Promise + logicWhileServing: (serveUrl: string) => Promise, ): Promise; finalizeEval(id: EvalID): Promise; diff --git a/runner/orchestration/gateways/local_gateway.ts b/runner/orchestration/gateways/local_gateway.ts index b39a0e3..75cd970 100644 --- a/runner/orchestration/gateways/local_gateway.ts +++ b/runner/orchestration/gateways/local_gateway.ts @@ -1,26 +1,23 @@ -import { ChildProcess, fork } from 'node:child_process'; +import {ChildProcess, fork} from 'node:child_process'; import { BuildResult, BuildWorkerMessage, BuildWorkerResponseMessage, } from '../../workers/builder/builder-types.js'; -import { - LlmGenerateFilesContext, - LlmRunner, -} from '../../codegen/llm-runner.js'; +import {LlmGenerateFilesContext, LlmRunner} from '../../codegen/llm-runner.js'; import { RootPromptDefinition, LlmContextFile, LlmResponse, LlmResponseFile, } from '../../shared-interfaces.js'; -import { generateCodeWithAI } from '../codegen.js'; -import { EvalID, Gateway } from '../gateway.js'; +import {generateCodeWithAI} from '../codegen.js'; +import {EvalID, Gateway} from '../gateway.js'; import path from 'node:path'; -import { killChildProcessGracefully } from '../../utils/kill-gracefully.js'; -import { ProgressLogger } from '../../progress/progress-logger.js'; -import { serveApp } from '../../workers/serve-testing/serve-app.js'; -import { LocalEnvironment } from '../../configuration/environment-local.js'; +import {killChildProcessGracefully} from '../../utils/kill-gracefully.js'; +import {ProgressLogger} from '../../progress/progress-logger.js'; +import {serveApp} from '../../workers/serve-testing/serve-app.js'; +import {LocalEnvironment} from '../../configuration/environment-local.js'; import PQueue from 'p-queue'; let uniqueIDs = 0; @@ -37,15 +34,9 @@ export class LocalGateway implements Gateway { requestCtx: LlmGenerateFilesContext, model: string, contextFiles: LlmContextFile[], - abortSignal: AbortSignal + abortSignal: AbortSignal, ): Promise { - return await generateCodeWithAI( - this.llm, - model, - requestCtx, - contextFiles, - abortSignal - ); + return await generateCodeWithAI(this.llm, model, requestCtx, contextFiles, abortSignal); } async repairBuild( @@ -55,15 +46,9 @@ export class LocalGateway implements Gateway { errorMessage: string, appFiles: LlmResponseFile[], contextFiles: LlmContextFile[], - abortSignal: AbortSignal + abortSignal: AbortSignal, ): Promise { - return await generateCodeWithAI( - this.llm, - model, - requestCtx, - contextFiles, - abortSignal - ); + return await generateCodeWithAI(this.llm, model, requestCtx, contextFiles, abortSignal); } tryBuild( @@ -73,7 +58,7 @@ export class LocalGateway implements Gateway { rootPromptDef: RootPromptDefinition, workerConcurrencyQueue: PQueue, abortSignal: AbortSignal, - progress: ProgressLogger + progress: ProgressLogger, ): Promise { const buildParams: BuildWorkerMessage = { directory: appDirectoryPath, @@ -85,11 +70,8 @@ export class LocalGateway implements Gateway { () => new Promise((resolve, reject) => { const child: ChildProcess = fork( - path.resolve( - import.meta.dirname, - '../../workers/builder/worker.js' - ), - { signal: abortSignal } + path.resolve(import.meta.dirname, '../../workers/builder/worker.js'), + {signal: abortSignal}, ); child.send(buildParams); @@ -97,12 +79,12 @@ export class LocalGateway implements Gateway { await killChildProcessGracefully(child); resolve(result.payload); }); - child.on('error', async (err) => { + child.on('error', async err => { await killChildProcessGracefully(child); reject(err); }); }), - { throwOnTimeout: true } + {throwOnTimeout: true}, ); } @@ -112,14 +94,14 @@ export class LocalGateway implements Gateway { appDirectoryPath: string, rootPromptDef: RootPromptDefinition, progress: ProgressLogger, - logicWhileServing: (serveUrl: string) => Promise + logicWhileServing: (serveUrl: string) => Promise, ): Promise { return await serveApp( env.serveCommand, rootPromptDef, appDirectoryPath, progress, - logicWhileServing + logicWhileServing, ); } diff --git a/runner/orchestration/generate.ts b/runner/orchestration/generate.ts index dd3012f..153dd5f 100644 --- a/runner/orchestration/generate.ts +++ b/runner/orchestration/generate.ts @@ -1,10 +1,10 @@ -import { globSync } from 'tinyglobby'; -import { readFile } from 'fs/promises'; -import { availableParallelism } from 'os'; -import { randomUUID } from 'crypto'; +import {globSync} from 'tinyglobby'; +import {readFile} from 'fs/promises'; +import {availableParallelism} from 'os'; +import {randomUUID} from 'crypto'; import PQueue from 'p-queue'; -import { basename, join } from 'path'; -import { existsSync, readdirSync } from 'fs'; +import {basename, join} from 'path'; +import {existsSync, readdirSync} from 'fs'; import { assertValidModelName, LlmGenerateFilesContext, @@ -15,10 +15,10 @@ import { LLM_OUTPUT_DIR, REPORT_VERSION, } from '../configuration/constants.js'; -import { Environment } from '../configuration/environment.js'; -import { rateGeneratedCode } from '../ratings/rate-code.js'; -import { summarizeReportWithAI } from '../reporting/ai-summarize.js'; -import { redX } from '../reporting/format.js'; +import {Environment} from '../configuration/environment.js'; +import {rateGeneratedCode} from '../ratings/rate-code.js'; +import {summarizeReportWithAI} from '../reporting/ai-summarize.js'; +import {redX} from '../reporting/format.js'; import { AssessmentResult, AttemptDetails, @@ -32,29 +32,25 @@ import { RunSummary, Usage, } from '../shared-interfaces.js'; -import { BrowserAgentTaskInput } from '../testing/browser-agent/models.js'; -import { callWithTimeout } from '../utils/timeout.js'; -import { attemptBuild } from './build-serve-loop.js'; -import { createLlmResponseTokenUsageMessage } from './codegen.js'; -import { generateUserJourneysForApp } from './user-journeys.js'; -import { - resolveContextFiles, - setupProjectStructure, - writeResponseFiles, -} from './file-system.js'; -import { GenkitRunner } from '../codegen/genkit/genkit-runner.js'; -import { getEnvironmentByPath } from '../configuration/environment-resolution.js'; -import { getPossiblePackageManagers } from '../configuration/environment-config.js'; -import { ProgressLogger } from '../progress/progress-logger.js'; -import { TextProgressLogger } from '../progress/text-progress-logger.js'; -import { logReportHeader } from '../reporting/report-logging.js'; -import { DynamicProgressLogger } from '../progress/dynamic-progress-logger.js'; -import { UserFacingError } from '../utils/errors.js'; -import { getRunGroupId } from './grouping.js'; -import { executeCommand } from '../utils/exec.js'; -import { EvalID, Gateway } from './gateway.js'; -import { LocalEnvironment } from '../configuration/environment-local.js'; -import { getRunnerByName, RunnerName } from '../codegen/runner-creation.js'; +import {BrowserAgentTaskInput} from '../testing/browser-agent/models.js'; +import {callWithTimeout} from '../utils/timeout.js'; +import {attemptBuild} from './build-serve-loop.js'; +import {createLlmResponseTokenUsageMessage} from './codegen.js'; +import {generateUserJourneysForApp} from './user-journeys.js'; +import {resolveContextFiles, setupProjectStructure, writeResponseFiles} from './file-system.js'; +import {GenkitRunner} from '../codegen/genkit/genkit-runner.js'; +import {getEnvironmentByPath} from '../configuration/environment-resolution.js'; +import {getPossiblePackageManagers} from '../configuration/environment-config.js'; +import {ProgressLogger} from '../progress/progress-logger.js'; +import {TextProgressLogger} from '../progress/text-progress-logger.js'; +import {logReportHeader} from '../reporting/report-logging.js'; +import {DynamicProgressLogger} from '../progress/dynamic-progress-logger.js'; +import {UserFacingError} from '../utils/errors.js'; +import {getRunGroupId} from './grouping.js'; +import {executeCommand} from '../utils/exec.js'; +import {EvalID, Gateway} from './gateway.js'; +import {LocalEnvironment} from '../configuration/environment-local.js'; +import {getRunnerByName, RunnerName} from '../codegen/runner-creation.js'; /** * Orchestrates the entire assessment process for each prompt defined in the `prompts` array. @@ -90,10 +86,7 @@ export async function generateCodeAndAssess(options: { autoraterModel?: string; a11yRepairAttempts?: number; }): Promise { - const env = await getEnvironmentByPath( - options.environmentConfigPath, - options.runner - ); + const env = await getEnvironmentByPath(options.environmentConfigPath, options.runner); const ratingLlm = await getRunnerByName('genkit'); // TODO(devversion): Consider validating model names also for remote environments. @@ -105,12 +98,10 @@ export async function generateCodeAndAssess(options: { const promptsToProcess = getCandidateExecutablePrompts( env, options.localMode, - options.promptFilter + options.promptFilter, ).slice(0, options.limit); const progress = - options.logging === 'dynamic' - ? new DynamicProgressLogger() - : new TextProgressLogger(); + options.logging === 'dynamic' ? new DynamicProgressLogger() : new TextProgressLogger(); const appConcurrency = options.concurrency === 'auto' ? Math.floor(availableParallelism() * 0.8) @@ -119,9 +110,7 @@ export async function generateCodeAndAssess(options: { if (promptsToProcess.length === 0) { throw new UserFacingError( `No prompts have been configured for environment '${env.displayName}'` + - (options.promptFilter - ? ` and filtered by '${options.promptFilter}'.` - : '.') + (options.promptFilter ? ` and filtered by '${options.promptFilter}'.` : '.'), ); } @@ -142,15 +131,12 @@ export async function generateCodeAndAssess(options: { env.mcpServerOptions.length && env.llm.startMcpServerHost ) { - env.llm.startMcpServerHost( - `mcp-${env.clientSideFramework.id}`, - env.mcpServerOptions - ); + env.llm.startMcpServerHost(`mcp-${env.clientSideFramework.id}`, env.mcpServerOptions); } progress.initialize(promptsToProcess.length); - const appConcurrencyQueue = new PQueue({ concurrency: appConcurrency }); + const appConcurrencyQueue = new PQueue({concurrency: appConcurrency}); const workerConcurrencyQueue = new PQueue({ concurrency: options.concurrency === 'auto' @@ -173,7 +159,7 @@ export async function generateCodeAndAssess(options: { try { return await callWithTimeout( `Evaluation of ${rootPromptDef.name}`, - async (abortSignal) => + async abortSignal => startEvaluationTask( evalID, env, @@ -192,10 +178,10 @@ export async function generateCodeAndAssess(options: { workerConcurrencyQueue, progress, options.autoraterModel || DEFAULT_AUTORATER_MODEL_NAME, - options.a11yRepairAttempts ?? 0 + options.a11yRepairAttempts ?? 0, ), // 10min max per app evaluation. We just want to make sure it never gets stuck. - 10 + 10, ); } catch (e: unknown) { failedPrompts.push({ @@ -209,12 +195,7 @@ export async function generateCodeAndAssess(options: { details += `\nStack: ${e.stack}`; } - progress.log( - rootPromptDef, - 'error', - 'Failed to evaluate code', - details - ); + progress.log(rootPromptDef, 'error', 'Failed to evaluate code', details); return [] satisfies AssessmentResult[]; } finally { progress.log(rootPromptDef, 'done', 'Done'); @@ -222,8 +203,8 @@ export async function generateCodeAndAssess(options: { await env.gateway.finalizeEval(evalID); } }, - { throwOnTimeout: true } - ) + {throwOnTimeout: true}, + ), ); } @@ -242,7 +223,7 @@ export async function generateCodeAndAssess(options: { env.llm.startMcpServerHost && env.llm.flushMcpServerLogs ? { - servers: env.mcpServerOptions.map((m) => ({ + servers: env.mcpServerOptions.map(m => ({ name: m.name, command: m.command, args: m.args, @@ -263,16 +244,12 @@ export async function generateCodeAndAssess(options: { allPromptsCount: promptsToProcess.length, failedPrompts, }, - options + options, ), timestamp: timestamp.toISOString(), reportName: options.reportName, - systemPromptGeneration: env.classifyPrompts - ? 'Classified 🕵️' - : env.systemPromptGeneration(), - systemPromptRepair: env.classifyPrompts - ? 'Classified 🕵️' - : env.systemPromptRepair(), + systemPromptGeneration: env.classifyPrompts ? 'Classified 🕵️' : env.systemPromptGeneration(), + systemPromptRepair: env.classifyPrompts ? 'Classified 🕵️' : env.systemPromptRepair(), // Deduplicate labels before finalizing the report. labels: Array.from(new Set(options.labels)), mcp, @@ -331,19 +308,18 @@ async function startEvaluationTask( workerConcurrencyQueue: PQueue, progress: ProgressLogger, autoraterModel: string, - a11yRepairAttempts: number + a11yRepairAttempts: number, ): Promise { // Set up the project structure once for the root project. - const { directory, cleanup } = await setupProjectStructure( + const {directory, cleanup} = await setupProjectStructure( env, rootPromptDef, progress, - outputDirectory + outputDirectory, ); const results: AssessmentResult[] = []; - const defsToExecute = - rootPromptDef.kind === 'single' ? [rootPromptDef] : rootPromptDef.steps; + const defsToExecute = rootPromptDef.kind === 'single' ? [rootPromptDef] : rootPromptDef.steps; for (const promptDef of defsToExecute) { const [fullPromptText, systemInstructions] = await Promise.all([ @@ -353,10 +329,7 @@ async function startEvaluationTask( // Resolve the context files from the root. We need to do this after the project is set up // and for each sub-prompt, because the project will be augmented on each iteration. - const contextFiles = await resolveContextFiles( - promptDef.contextFilePatterns, - directory - ); + const contextFiles = await resolveContextFiles(promptDef.contextFilePatterns, directory); // Generate the initial set of files through the LLM. const initialResponse = await generateInitialFiles( @@ -370,16 +343,14 @@ async function startEvaluationTask( systemInstructions, combinedPrompt: fullPromptText, executablePrompt: promptDef.prompt, - packageManager: - env instanceof LocalEnvironment ? env.packageManager : undefined, - buildCommand: - env instanceof LocalEnvironment ? env.buildCommand : undefined, + packageManager: env instanceof LocalEnvironment ? env.packageManager : undefined, + buildCommand: env instanceof LocalEnvironment ? env.buildCommand : undefined, possiblePackageManagers: getPossiblePackageManagers().slice(), }, contextFiles, localMode, abortSignal, - progress + progress, ); const toolLogs = initialResponse.toolLogs ?? []; @@ -388,7 +359,7 @@ async function startEvaluationTask( progress.log( promptDef, 'error', - 'Failed to generate initial code using AI. Skipping this app.' + 'Failed to generate initial code using AI. Skipping this app.', ); await cleanup(); break; @@ -399,22 +370,12 @@ async function startEvaluationTask( // Note: This can fail when the LLM e.g. produced a wrong file name that is too large, // and results in a file system error. Gracefully handle this so we can continue testing. // Write the generated files to disk within the project directory. - await writeResponseFiles( - directory, - initialResponse.files, - env, - rootPromptDef.name - ); + await writeResponseFiles(directory, initialResponse.files, env, rootPromptDef.name); // If we're in a multi-step prompt, also write out to dedicated directories // for each sub-prompt so that we can inspect the output along the way. if (rootPromptDef.kind === 'multi-step') { - await writeResponseFiles( - directory, - initialResponse.files, - env, - promptDef.name - ); + await writeResponseFiles(directory, initialResponse.files, env, promptDef.name); } } catch (e) { let details = `Error: ${e}`; @@ -427,7 +388,7 @@ async function startEvaluationTask( promptDef, 'error', 'Failed to generate initial code using AI. Skipping this app.', - details + details, ); await cleanup(); @@ -439,18 +400,17 @@ async function startEvaluationTask( rootPromptDef.name, defsToExecute[0].prompt, initialResponse.files, - abortSignal + abortSignal, ); // TODO: Only execute the serve command on the "final working attempt". // TODO: Incorporate usage. - const userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined = - enableUserJourneyTesting - ? { - userJourneys: userJourneys.result, - appPrompt: defsToExecute[0].prompt, - } - : undefined; + const userJourneyAgentTaskInput: BrowserAgentTaskInput | undefined = enableUserJourneyTesting + ? { + userJourneys: userJourneys.result, + appPrompt: defsToExecute[0].prompt, + } + : undefined; const attemptDetails: AttemptDetails[] = []; // Store details for assessment.json @@ -473,7 +433,7 @@ async function startEvaluationTask( skipAxeTesting, enableAutoCsp, userJourneyAgentTaskInput, - a11yRepairAttempts + a11yRepairAttempts, ); if (!attempt) { @@ -493,7 +453,7 @@ async function startEvaluationTask( attempt.axeRepairAttempts, abortSignal, progress, - autoraterModel + autoraterModel, ); results.push({ @@ -540,24 +500,22 @@ async function generateInitialFiles( contextFiles: LlmContextFile[], localMode: boolean, abortSignal: AbortSignal, - progress: ProgressLogger + progress: ProgressLogger, ): Promise { if (localMode) { const localFilesDirectory = join(LLM_OUTPUT_DIR, env.id, promptDef.name); - const filePaths = globSync('**/*', { cwd: localFilesDirectory }); + const filePaths = globSync('**/*', {cwd: localFilesDirectory}); if (filePaths.length === 0) { - throw new UserFacingError( - `Could not find pre-existing files in ${localFilesDirectory}` - ); + throw new UserFacingError(`Could not find pre-existing files in ${localFilesDirectory}`); } return { files: await Promise.all( - filePaths.map(async (filePath) => ({ + filePaths.map(async filePath => ({ filePath, code: await readFile(join(localFilesDirectory, filePath), 'utf8'), - })) + })), ), usage: { inputTokens: 0, @@ -576,7 +534,7 @@ async function generateInitialFiles( codegenContext, model, contextFiles, - abortSignal + abortSignal, ); if (response.success) { @@ -584,21 +542,14 @@ async function generateInitialFiles( promptDef, 'codegen', 'Received AI code generation response', - createLlmResponseTokenUsageMessage(response) ?? '' + createLlmResponseTokenUsageMessage(response) ?? '', ); } else { - progress.log( - promptDef, - 'error', - 'Failed to generate code with AI', - response.errors.join(', ') - ); + progress.log(promptDef, 'error', 'Failed to generate code with AI', response.errors.join(', ')); } if (!response.success) { - throw new Error( - `Initial file generation failed: ${response.errors.join('\n')}` - ); + throw new Error(`Initial file generation failed: ${response.errors.join('\n')}`); } return { @@ -620,13 +571,13 @@ async function prepareSummary( env: Environment, assessments: AssessmentResult[], completionStats: CompletionStats, - opts: { skipAiSummary?: boolean } + opts: {skipAiSummary?: boolean}, ): Promise { let inputTokens = 0; let outputTokens = 0; let totalTokens = 0; - assessments.forEach((result) => { + assessments.forEach(result => { // Incorporate usage from running raters. if (result.score.tokenUsage) { inputTokens += result.score.tokenUsage.inputTokens; @@ -635,7 +586,7 @@ async function prepareSummary( } // Incorporate usage numbers from all generate + build attempts. - result.attemptDetails.forEach((attempt) => { + result.attemptDetails.forEach(attempt => { if (attempt.usage) { inputTokens += attempt.usage.inputTokens ?? 0; outputTokens += attempt.usage.outputTokens ?? 0; @@ -647,11 +598,7 @@ async function prepareSummary( let aiSummary: string | undefined = undefined; if (!opts.skipAiSummary) { try { - const result = await summarizeReportWithAI( - genkit, - abortSignal, - assessments - ); + const result = await summarizeReportWithAI(genkit, abortSignal, assessments); inputTokens += result.usage.inputTokens; outputTokens += result.usage.outputTokens; totalTokens += result.usage.totalTokens; @@ -687,8 +634,7 @@ async function prepareSummary( }, runner: { id: env instanceof LocalEnvironment ? env.llm.id : 'remote', - displayName: - env instanceof LocalEnvironment ? env.llm.displayName : 'Remote', + displayName: env instanceof LocalEnvironment ? env.llm.displayName : 'Remote', }, } satisfies RunSummary; } @@ -697,7 +643,7 @@ async function prepareSummary( function getCandidateExecutablePrompts( env: Environment, localMode: boolean, - promptFilter: string | undefined + promptFilter: string | undefined, ): RootPromptDefinition[] { const envDir = join(LLM_OUTPUT_DIR, env.id); let result = env.executablePrompts; @@ -708,10 +654,10 @@ function getCandidateExecutablePrompts( const localPromptNames = readdirSync(envDir, { withFileTypes: true, }) - .filter((entry) => entry.isDirectory()) - .map((entry) => basename(entry.name)); + .filter(entry => entry.isDirectory()) + .map(entry => basename(entry.name)); - result = result.filter(({ name }) => localPromptNames.includes(name)); + result = result.filter(({name}) => localPromptNames.includes(name)); } // If there's no prompt filter, shuffle the array to introduce some randomness. @@ -721,7 +667,7 @@ function getCandidateExecutablePrompts( // Otherwise only filter by name, but don't shuffle since // the user appears to be targeting a specific prompt. - return result.filter(({ name }) => name.includes(promptFilter)); + return result.filter(({name}) => name.includes(promptFilter)); } let chromeInstallPromise: Promise | null = null; @@ -734,7 +680,7 @@ async function installChrome(): Promise { chromeInstallPromise = executeCommand( 'npx puppeteer browsers install chrome', // The command needs to run in a directory whose closest node_modules contain `puppeteer`. - import.meta.dirname + import.meta.dirname, ); } diff --git a/runner/orchestration/grouping.ts b/runner/orchestration/grouping.ts index c43d97f..9eb19aa 100644 --- a/runner/orchestration/grouping.ts +++ b/runner/orchestration/grouping.ts @@ -1,13 +1,9 @@ -import { createHash } from 'crypto'; -import type { LlmRunner } from '../codegen/llm-runner.js'; -import type { Environment } from '../configuration/environment.js'; -import { calculateBuildAndCheckStats } from '../ratings/stats.js'; -import type { - AssessmentResult, - RunGroup, - RunInfo, -} from '../shared-interfaces.js'; -import { RunnerName } from '../codegen/runner-creation.js'; +import {createHash} from 'crypto'; +import type {LlmRunner} from '../codegen/llm-runner.js'; +import type {Environment} from '../configuration/environment.js'; +import {calculateBuildAndCheckStats} from '../ratings/stats.js'; +import type {AssessmentResult, RunGroup, RunInfo} from '../shared-interfaces.js'; +import {RunnerName} from '../codegen/runner-creation.js'; /** Generates a unique grouping ID for a run. */ export function getRunGroupId( @@ -17,7 +13,7 @@ export function getRunGroupId( runner: RunnerName; model: string; labels?: string[]; - } + }, ): string { const dateOnly = new Date( timestamp.getFullYear(), @@ -25,7 +21,7 @@ export function getRunGroupId( timestamp.getDate(), 0, 0, - 0 + 0, ); // We use this as a key to group identical reports together. @@ -69,7 +65,7 @@ export function groupSimilarReports(inputRuns: RunInfo[]): RunGroup[] { let totalForRun = 0; let maxForRun = 0; - run.details.labels?.forEach((label) => labels.add(label)); + run.details.labels?.forEach(label => labels.add(label)); for (const result of run.results) { totalForRun += result.score.totalPoints; diff --git a/runner/orchestration/serve-testing-worker.ts b/runner/orchestration/serve-testing-worker.ts index ca5b369..5406b13 100644 --- a/runner/orchestration/serve-testing-worker.ts +++ b/runner/orchestration/serve-testing-worker.ts @@ -1,16 +1,16 @@ -import { ChildProcess, fork } from 'node:child_process'; +import {ChildProcess, fork} from 'node:child_process'; import path from 'node:path'; -import { Environment } from '../configuration/environment.js'; -import { ProgressLogger } from '../progress/progress-logger.js'; -import { RootPromptDefinition } from '../shared-interfaces.js'; -import { killChildProcessGracefully } from '../utils/kill-gracefully.js'; +import {Environment} from '../configuration/environment.js'; +import {ProgressLogger} from '../progress/progress-logger.js'; +import {RootPromptDefinition} from '../shared-interfaces.js'; +import {killChildProcessGracefully} from '../utils/kill-gracefully.js'; import { ServeTestingResult, ServeTestingWorkerMessage, ServeTestingWorkerResponseMessage, } from '../workers/serve-testing/worker-types.js'; -import { EvalID, Gateway } from './gateway.js'; -import { BrowserAgentTaskInput } from '../testing/browser-agent/models.js'; +import {EvalID, Gateway} from './gateway.js'; +import {BrowserAgentTaskInput} from '../testing/browser-agent/models.js'; import PQueue from 'p-queue'; /** Attempts to run & test an eval app. */ @@ -26,7 +26,7 @@ export async function serveAndTestApp( skipScreenshots: boolean, skipAxeTesting: boolean, enableAutoCsp: boolean, - userJourneyAgentTaskInput?: BrowserAgentTaskInput + userJourneyAgentTaskInput?: BrowserAgentTaskInput, ): Promise { progress.log(rootPromptDef, 'serve-testing', `Testing the app`); @@ -36,7 +36,7 @@ export async function serveAndTestApp( appDirectoryPath, rootPromptDef, progress, - async (serveUrl) => { + async serveUrl => { const serveParams: ServeTestingWorkerMessage = { serveUrl, appName: rootPromptDef.name, @@ -50,49 +50,38 @@ export async function serveAndTestApp( () => new Promise((resolve, reject) => { const child: ChildProcess = fork( - path.resolve( - import.meta.dirname, - '../workers/serve-testing/worker.js' - ), - { signal: abortSignal } + path.resolve(import.meta.dirname, '../workers/serve-testing/worker.js'), + {signal: abortSignal}, ); child.send(serveParams); - child.on( - 'message', - async (result: ServeTestingWorkerResponseMessage) => { - if (result.type === 'result') { - await killChildProcessGracefully(child); - resolve(result.payload); - } else { - progress.log( - rootPromptDef, - result.payload.state, - result.payload.message, - result.payload.details - ); - } + child.on('message', async (result: ServeTestingWorkerResponseMessage) => { + if (result.type === 'result') { + await killChildProcessGracefully(child); + resolve(result.payload); + } else { + progress.log( + rootPromptDef, + result.payload.state, + result.payload.message, + result.payload.details, + ); } - ); - child.on('error', async (err) => { + }); + child.on('error', async err => { await killChildProcessGracefully(child); reject(err); }); }), - { throwOnTimeout: true } + {throwOnTimeout: true}, ); - } + }, ); if (result.errorMessage === undefined) { progress.log(rootPromptDef, 'success', 'Testing is successful'); } else { - progress.log( - rootPromptDef, - 'error', - 'Testing has failed', - result.errorMessage - ); + progress.log(rootPromptDef, 'error', 'Testing has failed', result.errorMessage); } return result; diff --git a/runner/orchestration/user-journeys.ts b/runner/orchestration/user-journeys.ts index 0238c70..ef6165f 100644 --- a/runner/orchestration/user-journeys.ts +++ b/runner/orchestration/user-journeys.ts @@ -1,7 +1,7 @@ -import { z } from 'zod'; -import { LlmResponseFile, Usage } from '../shared-interfaces.js'; -import { GenkitRunner } from '../codegen/genkit/genkit-runner.js'; -import { UserFacingError } from '../utils/errors.js'; +import {z} from 'zod'; +import {LlmResponseFile, Usage} from '../shared-interfaces.js'; +import {GenkitRunner} from '../codegen/genkit/genkit-runner.js'; +import {UserFacingError} from '../utils/errors.js'; // NOTE: When changing this, also change `browser-agent`'s prompt! const USER_JOURNEY_SCHEMA = z.object({ @@ -29,7 +29,7 @@ export async function generateUserJourneysForApp( appName: string, appPrompt: string, appFiles: LlmResponseFile[], - abortSignal: AbortSignal + abortSignal: AbortSignal, ): Promise { const result = await llm.generateConstrained({ prompt: ` @@ -102,14 +102,14 @@ Create a modern, single-page web application that allows users to find recipes b messages: [ { role: 'user', - content: [{ text: `Below is the user's prompt:\n\n${appPrompt}` }], + content: [{text: `Below is the user's prompt:\n\n${appPrompt}`}], }, { role: 'user', content: [ { text: `Below is the source code of an app generated for the above prompt:\n\n${appFiles - .map((file) => `${file.filePath}:\n\`\`\`\n${file.code}\`\`\``) + .map(file => `${file.filePath}:\n\`\`\`\n${file.code}\`\`\``) .join('\n\n')}`, }, ], diff --git a/runner/progress/dynamic-progress-logger.ts b/runner/progress/dynamic-progress-logger.ts index 2c7da6e..6e7e9a6 100644 --- a/runner/progress/dynamic-progress-logger.ts +++ b/runner/progress/dynamic-progress-logger.ts @@ -1,12 +1,8 @@ -import { MultiBar, SingleBar, Presets } from 'cli-progress'; +import {MultiBar, SingleBar, Presets} from 'cli-progress'; import chalk from 'chalk'; -import { RootPromptDefinition } from '../shared-interfaces.js'; -import { - ProgressLogger, - ProgressType, - progressTypeToIcon, -} from './progress-logger.js'; -import { redX } from '../reporting/format.js'; +import {RootPromptDefinition} from '../shared-interfaces.js'; +import {ProgressLogger, ProgressType, progressTypeToIcon} from './progress-logger.js'; +import {redX} from '../reporting/format.js'; const PREFIX_WIDTH = 20; @@ -46,7 +42,7 @@ export class DynamicProgressLogger implements ProgressLogger { ...Presets.rect, // Use a character so the bar is visible while it's empty. barIncompleteChar: '_', - } + }, ); // Bar that tracks how many prompts are completed in total. @@ -88,12 +84,7 @@ export class DynamicProgressLogger implements ProgressLogger { } } - log( - prompt: RootPromptDefinition, - type: ProgressType, - message: string, - details?: string - ): void { + log(prompt: RootPromptDefinition, type: ProgressType, message: string, details?: string): void { if (!this.wrapper || !this.totalBar) { return; } @@ -113,14 +104,11 @@ export class DynamicProgressLogger implements ProgressLogger { // Capture errors for static printing once the dynamic progress is hidden. if (type === 'error') { - this.errors.push({ prompt, message, details }); + this.errors.push({prompt, message, details}); } // Pad/trim the name so they're all the same length. - const name = this.trimString( - prompt.name.padEnd(PREFIX_WIDTH, ' '), - PREFIX_WIDTH - ); + const name = this.trimString(prompt.name.padEnd(PREFIX_WIDTH, ' '), PREFIX_WIDTH); const payload = { name: `${this.getColorFunction(type)(name)}`, message: `${progressTypeToIcon(type)} ${this.trimString(message, 100)}`, @@ -151,8 +139,6 @@ export class DynamicProgressLogger implements ProgressLogger { } private trimString(value: string, maxLength: number): string { - return value.length > maxLength - ? value.slice(0, maxLength - 1) + '…' - : value; + return value.length > maxLength ? value.slice(0, maxLength - 1) + '…' : value; } } diff --git a/runner/progress/noop-progress-logger.ts b/runner/progress/noop-progress-logger.ts index e414817..1b0f815 100644 --- a/runner/progress/noop-progress-logger.ts +++ b/runner/progress/noop-progress-logger.ts @@ -1,4 +1,4 @@ -import { ProgressLogger } from './progress-logger.js'; +import {ProgressLogger} from './progress-logger.js'; /** A noop progress logger */ export class NoopProgressLogger implements ProgressLogger { diff --git a/runner/progress/progress-logger.ts b/runner/progress/progress-logger.ts index 5b525b1..820c7a6 100644 --- a/runner/progress/progress-logger.ts +++ b/runner/progress/progress-logger.ts @@ -1,5 +1,5 @@ -import { greenCheckmark, redX } from '../reporting/format.js'; -import { RootPromptDefinition } from '../shared-interfaces.js'; +import {greenCheckmark, redX} from '../reporting/format.js'; +import {RootPromptDefinition} from '../shared-interfaces.js'; /** Possible progress event types. */ export type ProgressType = @@ -50,10 +50,5 @@ export interface ProgressLogger { * @param message Message associated with the event. * @param details Additional information about the event. */ - log( - prompt: RootPromptDefinition, - type: ProgressType, - message: string, - details?: string - ): void; + log(prompt: RootPromptDefinition, type: ProgressType, message: string, details?: string): void; } diff --git a/runner/progress/text-progress-logger.ts b/runner/progress/text-progress-logger.ts index d55f26c..3ecd959 100644 --- a/runner/progress/text-progress-logger.ts +++ b/runner/progress/text-progress-logger.ts @@ -1,9 +1,5 @@ -import { RootPromptDefinition } from '../shared-interfaces.js'; -import { - ProgressLogger, - ProgressType, - progressTypeToIcon, -} from './progress-logger.js'; +import {RootPromptDefinition} from '../shared-interfaces.js'; +import {ProgressLogger, ProgressType, progressTypeToIcon} from './progress-logger.js'; /** A progress logger that logs the progression as a flat stream of text. */ export class TextProgressLogger implements ProgressLogger { @@ -19,12 +15,7 @@ export class TextProgressLogger implements ProgressLogger { this.total = this.done = 0; } - log( - prompt: RootPromptDefinition, - type: ProgressType, - message: string, - details?: string - ): void { + log(prompt: RootPromptDefinition, type: ProgressType, message: string, details?: string): void { const icon = progressTypeToIcon(type); if (type === 'done') { diff --git a/runner/ratings/autoraters/auto-rate-shared.ts b/runner/ratings/autoraters/auto-rate-shared.ts index b640211..0f3ef7f 100644 --- a/runner/ratings/autoraters/auto-rate-shared.ts +++ b/runner/ratings/autoraters/auto-rate-shared.ts @@ -1,4 +1,4 @@ -import { Usage } from '../../shared-interfaces.js'; +import {Usage} from '../../shared-interfaces.js'; /** Maximum rating that the LLM can assign. */ export const MAX_RATING = 10; @@ -9,7 +9,7 @@ export interface AutoRateResult { usage: Usage; details: { summary: string; - categories: { name: string; message: string }[]; + categories: {name: string; message: string}[]; }; } diff --git a/runner/ratings/autoraters/code-rater.ts b/runner/ratings/autoraters/code-rater.ts index 15c67ca..3e01696 100644 --- a/runner/ratings/autoraters/code-rater.ts +++ b/runner/ratings/autoraters/code-rater.ts @@ -1,21 +1,17 @@ -import { readFileSync } from 'node:fs'; -import { z } from 'zod'; -import { prepareContextFilesMessage } from '../../orchestration/codegen.js'; -import { Environment } from '../../configuration/environment.js'; +import {readFileSync} from 'node:fs'; +import {z} from 'zod'; +import {prepareContextFilesMessage} from '../../orchestration/codegen.js'; +import {Environment} from '../../configuration/environment.js'; import { IndividualAssessment, IndividualAssessmentState, LlmResponseFile, SkippedIndividualAssessment, } from '../../shared-interfaces.js'; -import { - AutoRateResult, - getCoefficient, - MAX_RATING, -} from './auto-rate-shared.js'; -import { GenkitRunner } from '../../codegen/genkit/genkit-runner.js'; +import {AutoRateResult, getCoefficient, MAX_RATING} from './auto-rate-shared.js'; +import {GenkitRunner} from '../../codegen/genkit/genkit-runner.js'; import defaultCodeRaterPrompt from './code-rating-prompt.js'; -import { RatingsResult } from '../rating-types.js'; +import {RatingsResult} from '../rating-types.js'; /** Framework-specific hints for the rating prompt. */ const FW_HINTS: Record = { @@ -48,13 +44,13 @@ export async function autoRateCode( environment: Environment, files: LlmResponseFile[], appPrompt: string, - ratingsResult: RatingsResult + ratingsResult: RatingsResult, ): Promise { const contextMessage = prepareContextFilesMessage( - files.map((o) => ({ + files.map(o => ({ relativePath: o.filePath, content: o.code, - })) + })), ); let promptText: string; @@ -62,7 +58,7 @@ export async function autoRateCode( if (environment.codeRatingPromptPath) { CACHED_RATING_PROMPTS[environment.codeRatingPromptPath] ??= readFileSync( environment.codeRatingPromptPath, - 'utf8' + 'utf8', ); promptText = CACHED_RATING_PROMPTS[environment.codeRatingPromptPath]; } else { @@ -78,16 +74,11 @@ export async function autoRateCode( ? JSON.stringify(safetyRating, null, 2) : ''; - const prompt = environment.renderPrompt( - promptText, - environment.codeRatingPromptPath, - { - APP_PROMPT: appPrompt, - FRAMEWORK_SPECIFIC_HINTS: - FW_HINTS[environment.fullStackFramework.id] ?? '', - SAFETY_WEB_RESULTS_JSON: safetyWebResultsJson, - } - ).result; + const prompt = environment.renderPrompt(promptText, environment.codeRatingPromptPath, { + APP_PROMPT: appPrompt, + FRAMEWORK_SPECIFIC_HINTS: FW_HINTS[environment.fullStackFramework.id] ?? '', + SAFETY_WEB_RESULTS_JSON: safetyWebResultsJson, + }).result; const result = await llm.generateConstrained({ abortSignal, @@ -96,15 +87,13 @@ export async function autoRateCode( prompt, skipMcp: true, schema: z.object({ - rating: z - .number() - .describe(`Rating from 1-${MAX_RATING}. Best is ${MAX_RATING}.`), + rating: z.number().describe(`Rating from 1-${MAX_RATING}. Best is ${MAX_RATING}.`), summary: z.string().describe('Summary of the overall code quality.'), categories: z.array( z.object({ name: z.string().describe('Category name'), message: z.string().describe('Short description of the problem.'), - }) + }), ), }), }); diff --git a/runner/ratings/autoraters/rate-files.ts b/runner/ratings/autoraters/rate-files.ts index e5453f2..e11724e 100644 --- a/runner/ratings/autoraters/rate-files.ts +++ b/runner/ratings/autoraters/rate-files.ts @@ -1,15 +1,15 @@ -import { greenCheckmark } from '../../reporting/format.js'; +import {greenCheckmark} from '../../reporting/format.js'; import { AutoraterRunInfo, IndividualAssessment, LlmResponseFile, SkippedIndividualAssessment, } from '../../shared-interfaces.js'; -import { autoRateCode } from './code-rater.js'; -import { autoRateAppearance } from './visuals-rater.js'; -import { Environment } from '../../configuration/environment.js'; -import { GenkitRunner } from '../../codegen/genkit/genkit-runner.js'; -import { RatingsResult } from '../rating-types.js'; +import {autoRateCode} from './code-rater.js'; +import {autoRateAppearance} from './visuals-rater.js'; +import {Environment} from '../../configuration/environment.js'; +import {GenkitRunner} from '../../codegen/genkit/genkit-runner.js'; +import {RatingsResult} from '../rating-types.js'; /** * Automatically rates the code inside of a file. @@ -29,7 +29,7 @@ export async function autoRateFiles( files: LlmResponseFile[], appPrompt: string, screenshotPngUrl: string | null, - ratingsResult: RatingsResult + ratingsResult: RatingsResult, ): Promise { console.log(`Autorater is using '${model}' model. \n`); @@ -42,7 +42,7 @@ export async function autoRateFiles( environment, files, appPrompt, - ratingsResult + ratingsResult, ); console.log(`${greenCheckmark()} Code scoring is successful.`); @@ -57,7 +57,7 @@ export async function autoRateFiles( environment, appPrompt, screenshotPngUrl, - 'command-line' + 'command-line', ); console.log(`${greenCheckmark()} Visual scoring is successful.`); } diff --git a/runner/ratings/autoraters/visuals-rater.ts b/runner/ratings/autoraters/visuals-rater.ts index 9cdbc84..e1b77d6 100644 --- a/runner/ratings/autoraters/visuals-rater.ts +++ b/runner/ratings/autoraters/visuals-rater.ts @@ -1,14 +1,10 @@ -import { z } from 'zod'; -import { PromptDataMessage } from '../../codegen/llm-runner.js'; -import { - AutoRateResult, - getCoefficient, - MAX_RATING, -} from './auto-rate-shared.js'; -import { GenkitRunner } from '../../codegen/genkit/genkit-runner.js'; +import {z} from 'zod'; +import {PromptDataMessage} from '../../codegen/llm-runner.js'; +import {AutoRateResult, getCoefficient, MAX_RATING} from './auto-rate-shared.js'; +import {GenkitRunner} from '../../codegen/genkit/genkit-runner.js'; import defaultVisualRaterPrompt from './visual-rating-prompt.js'; -import { Environment } from '../../configuration/environment.js'; -import { screenshotUrlToPngBuffer } from '../../utils/screenshots.js'; +import {Environment} from '../../configuration/environment.js'; +import {screenshotUrlToPngBuffer} from '../../utils/screenshots.js'; /** * Automatically rate the appearance of a screenshot using an LLM. @@ -27,7 +23,7 @@ export async function autoRateAppearance( environment: Environment, appPrompt: string, screenshotPngUrl: string, - label: string + label: string, ): Promise { const prompt = environment.renderPrompt(defaultVisualRaterPrompt, null, { APP_PROMPT: appPrompt, @@ -39,9 +35,7 @@ export async function autoRateAppearance( content: [ { media: { - base64PngImage: ( - await screenshotUrlToPngBuffer(screenshotPngUrl) - ).toString('base64'), + base64PngImage: (await screenshotUrlToPngBuffer(screenshotPngUrl)).toString('base64'), url: screenshotPngUrl, }, }, @@ -60,19 +54,15 @@ export async function autoRateAppearance( durationInMins: 2.5, }, schema: z.object({ - rating: z - .number() - .describe(`Rating from 1-${MAX_RATING}. Best is ${MAX_RATING}.`), + rating: z.number().describe(`Rating from 1-${MAX_RATING}. Best is ${MAX_RATING}.`), summary: z .string() - .describe( - 'Summary of the overall app, talking about concrete features, super concise.' - ), + .describe('Summary of the overall app, talking about concrete features, super concise.'), categories: z.array( z.object({ name: z.string().describe('Category name'), message: z.string().describe('Short description of what is missing.'), - }) + }), ), }), }); diff --git a/runner/ratings/built-in-ratings/axe-rating.ts b/runner/ratings/built-in-ratings/axe-rating.ts index 7ac4d14..dc74ed7 100644 --- a/runner/ratings/built-in-ratings/axe-rating.ts +++ b/runner/ratings/built-in-ratings/axe-rating.ts @@ -1,10 +1,5 @@ -import { Result } from 'axe-core'; -import { - PerBuildRating, - RatingCategory, - RatingKind, - RatingState, -} from '../rating-types.js'; +import {Result} from 'axe-core'; +import {PerBuildRating, RatingCategory, RatingKind, RatingState} from '../rating-types.js'; // Define the scoring weights for each violation impact level as a coefficient penalty. const IMPACT_COEFFICIENTS = { @@ -27,7 +22,7 @@ export const axeRating: PerBuildRating = { category: RatingCategory.MEDIUM_IMPACT, id: 'axe-a11y', scoreReduction: '10%', - rate: ({ serveResult, axeRepairAttempts }) => { + rate: ({serveResult, axeRepairAttempts}) => { const violations = serveResult?.axeViolations as Result[] | undefined; // Start with a perfect score. let coefficient = 1.0; @@ -50,9 +45,7 @@ export const axeRating: PerBuildRating = { const formattedViolations = violations .map((v, i) => formatAxeViolation(v, i, violations.length)) .join('\n\n'); - message += `Found ${ - violations.length - } accessibility violations:\n\n${formattedViolations}`; + message += `Found ${violations.length} accessibility violations:\n\n${formattedViolations}`; } // Apply penalties for repair attempts. @@ -77,11 +70,7 @@ export const axeRating: PerBuildRating = { /** * Formats a single Axe violation into a more concise, readable string. */ -function formatAxeViolation( - violation: Result, - index: number, - total: number -): string { +function formatAxeViolation(violation: Result, index: number, total: number): string { // Consolidate all violating selectors into a single line for brevity. const violationNum = total > 1 ? `${index + 1}.` : ''; const firstNodeHtml = violation.nodes[0]?.html; diff --git a/runner/ratings/built-in-ratings/code-quality-rating.ts b/runner/ratings/built-in-ratings/code-quality-rating.ts index 6cc5f95..2077c3e 100644 --- a/runner/ratings/built-in-ratings/code-quality-rating.ts +++ b/runner/ratings/built-in-ratings/code-quality-rating.ts @@ -1,10 +1,5 @@ -import { autoRateCode } from '../autoraters/code-rater.js'; -import { - LLMBasedRating, - RatingKind, - RatingCategory, - RatingState, -} from '../rating-types.js'; +import {autoRateCode} from '../autoraters/code-rater.js'; +import {LLMBasedRating, RatingKind, RatingCategory, RatingState} from '../rating-types.js'; /** Rating that verifies the generated code quality using an LLM. */ export const codeQualityRating: LLMBasedRating = { @@ -14,15 +9,15 @@ export const codeQualityRating: LLMBasedRating = { category: RatingCategory.MEDIUM_IMPACT, id: 'common-autorater-code-quality', scoreReduction: '30%', - rate: async (ctx) => { - const { coefficient, usage, details } = await autoRateCode( + rate: async ctx => { + const {coefficient, usage, details} = await autoRateCode( ctx.llm, ctx.abortSignal, ctx.model, ctx.environment, ctx.outputFiles, ctx.fullPromptText, - ctx.ratingsResult + ctx.ratingsResult, ); return { diff --git a/runner/ratings/built-in-ratings/index.ts b/runner/ratings/built-in-ratings/index.ts index a58eda3..c11bd40 100644 --- a/runner/ratings/built-in-ratings/index.ts +++ b/runner/ratings/built-in-ratings/index.ts @@ -1,16 +1,13 @@ -export { axeRating } from './axe-rating.js'; -export { codeQualityRating } from './code-quality-rating.js'; -export { NoInnerHtmlBindingsRating } from './no-inner-html-bindings-rating.js'; -export { NoDangerouslySetInnerHtmlRating } from './no-dangerously-set-inner-html-rating.js'; -export { noRuntimeExceptionsRating } from './no-runtime-errors-rating.js'; -export { safetyWebRating } from './safety-web-rating.js'; -export { successfulBuildRating } from './successful-build-rating.js'; -export { sufficientCodeSizeRating } from './sufficient-code-size-rating.js'; -export { sufficientGeneratedFilesRating } from './sufficient-generated-files-rating.js'; -export { userJourneysRating } from './user-journeys-rating.js'; -export { validCssRating } from './valid-css-rating.js'; -export { visualAppearanceRating } from './visual-appearance-rating.js'; -export { - cspViolationsRating, - trustedTypesViolationsRating, -} from './security-ratings.js'; +export {axeRating} from './axe-rating.js'; +export {codeQualityRating} from './code-quality-rating.js'; +export {NoInnerHtmlBindingsRating} from './no-inner-html-bindings-rating.js'; +export {NoDangerouslySetInnerHtmlRating} from './no-dangerously-set-inner-html-rating.js'; +export {noRuntimeExceptionsRating} from './no-runtime-errors-rating.js'; +export {safetyWebRating} from './safety-web-rating.js'; +export {successfulBuildRating} from './successful-build-rating.js'; +export {sufficientCodeSizeRating} from './sufficient-code-size-rating.js'; +export {sufficientGeneratedFilesRating} from './sufficient-generated-files-rating.js'; +export {userJourneysRating} from './user-journeys-rating.js'; +export {validCssRating} from './valid-css-rating.js'; +export {visualAppearanceRating} from './visual-appearance-rating.js'; +export {cspViolationsRating, trustedTypesViolationsRating} from './security-ratings.js'; diff --git a/runner/ratings/built-in-ratings/no-inner-html-bindings-rating.ts b/runner/ratings/built-in-ratings/no-inner-html-bindings-rating.ts index ee5c729..36a8e05 100644 --- a/runner/ratings/built-in-ratings/no-inner-html-bindings-rating.ts +++ b/runner/ratings/built-in-ratings/no-inner-html-bindings-rating.ts @@ -24,7 +24,7 @@ export const NoInnerHtmlBindingsRating: PerFileRating = { rate: async (code, filePath) => { const matches = [...code.matchAll(ANGULAR_BINDING_REGEX)]; if (matches.length > 0) { - const violations = matches.map((match) => `Binding to "[${match[1]}]"`); + const violations = matches.map(match => `Binding to "[${match[1]}]"`); return { rating: 0, errorMessage: `Found security vulnerabilities in ${filePath}:\n- ${violations.join('\n- ')}`, diff --git a/runner/ratings/built-in-ratings/no-runtime-errors-rating.ts b/runner/ratings/built-in-ratings/no-runtime-errors-rating.ts index 4efe110..53068a6 100644 --- a/runner/ratings/built-in-ratings/no-runtime-errors-rating.ts +++ b/runner/ratings/built-in-ratings/no-runtime-errors-rating.ts @@ -1,10 +1,5 @@ -import { BuildResultStatus } from '../../workers/builder/builder-types.js'; -import { - PerBuildRating, - RatingKind, - RatingCategory, - RatingState, -} from '../rating-types.js'; +import {BuildResultStatus} from '../../workers/builder/builder-types.js'; +import {PerBuildRating, RatingKind, RatingCategory, RatingState} from '../rating-types.js'; /** Rating which verifies that there are no runtime errors. */ export const noRuntimeExceptionsRating: PerBuildRating = { @@ -14,7 +9,7 @@ export const noRuntimeExceptionsRating: PerBuildRating = { category: RatingCategory.HIGH_IMPACT, scoreReduction: '50%', id: 'common-no-runtime-errors', - rate: ({ buildResult, serveResult }) => ({ + rate: ({buildResult, serveResult}) => ({ state: RatingState.EXECUTED, coefficient: // If we can't build - we can't run it as well. diff --git a/runner/ratings/built-in-ratings/safety-web-rating.ts b/runner/ratings/built-in-ratings/safety-web-rating.ts index 5494868..106038b 100644 --- a/runner/ratings/built-in-ratings/safety-web-rating.ts +++ b/runner/ratings/built-in-ratings/safety-web-rating.ts @@ -1,9 +1,4 @@ -import { - PerBuildRating, - RatingCategory, - RatingKind, - RatingState, -} from '../rating-types.js'; +import {PerBuildRating, RatingCategory, RatingKind, RatingState} from '../rating-types.js'; /** * A rating that assesses the code based on SafetyWeb violations found. @@ -15,7 +10,7 @@ export const safetyWebRating: PerBuildRating = { category: RatingCategory.HIGH_IMPACT, id: 'safety-web', scoreReduction: '50%', - rate: ({ buildResult }) => { + rate: ({buildResult}) => { // There should only be one package-- the generated app. const violations = buildResult.safetyWebReportJson?.[0]?.violations; @@ -37,12 +32,8 @@ export const safetyWebRating: PerBuildRating = { // Subtract from a starting coefficient of 1 based on the impact of each violation. let coefficient = 1.0 - violations.length * 0.1; - const formattedViolations = violations - .map((v, i) => v.ruleId + ' - ') - .join('\n\n'); - const message = `Found ${ - violations.length - } safety-web violations:\n\n${formattedViolations}`; + const formattedViolations = violations.map((v, i) => v.ruleId + ' - ').join('\n\n'); + const message = `Found ${violations.length} safety-web violations:\n\n${formattedViolations}`; return { state: RatingState.EXECUTED, diff --git a/runner/ratings/built-in-ratings/security-ratings.ts b/runner/ratings/built-in-ratings/security-ratings.ts index 259ecc6..f9eb26d 100644 --- a/runner/ratings/built-in-ratings/security-ratings.ts +++ b/runner/ratings/built-in-ratings/security-ratings.ts @@ -1,10 +1,5 @@ -import { - PerBuildRating, - RatingCategory, - RatingKind, - RatingState, -} from '../rating-types.js'; -import { CspViolation } from '../../workers/serve-testing/auto-csp-types.js'; +import {PerBuildRating, RatingCategory, RatingKind, RatingState} from '../rating-types.js'; +import {CspViolation} from '../../workers/serve-testing/auto-csp-types.js'; /** * Formats an array of CSP violations into a readable string for the report. @@ -17,7 +12,7 @@ function formatViolations(violations: CspViolation[]): string { } return violations .map( - (v) => + v => `- Violated Directive: ${v['violated-directive']} ` + ` Source File: ${v['source-file']}:${v['line-number']} @@ -28,7 +23,7 @@ function formatViolations(violations: CspViolation[]): string { --- ${v.codeSnippet || v['script-sample'] || 'Not available'} --- -` +`, ) .join('\n\n'); } @@ -40,12 +35,11 @@ ${v.codeSnippet || v['script-sample'] || 'Not available'} export const cspViolationsRating: PerBuildRating = { kind: RatingKind.PER_BUILD, name: 'CSP Violations', - description: - 'Checks for Content Security Policy violations, excluding Trusted Types.', + description: 'Checks for Content Security Policy violations, excluding Trusted Types.', id: 'csp-violations', category: RatingCategory.HIGH_IMPACT, scoreReduction: '50%', - rate: ({ serveResult }) => { + rate: ({serveResult}) => { if (!serveResult?.cspViolations) { return { state: RatingState.SKIPPED, @@ -54,7 +48,7 @@ export const cspViolationsRating: PerBuildRating = { } const violations = serveResult.cspViolations?.filter( - (v) => v['violated-directive'] !== 'require-trusted-types-for' + v => v['violated-directive'] !== 'require-trusted-types-for', ); if (!violations || violations.length === 0) { return { @@ -85,7 +79,7 @@ export const trustedTypesViolationsRating: PerBuildRating = { id: 'trusted-types-violations', category: RatingCategory.HIGH_IMPACT, scoreReduction: '50%', - rate: ({ serveResult }) => { + rate: ({serveResult}) => { if (!serveResult?.cspViolations) { return { state: RatingState.SKIPPED, @@ -94,7 +88,7 @@ export const trustedTypesViolationsRating: PerBuildRating = { } const violations = serveResult?.cspViolations?.filter( - (v) => v['violated-directive'] === 'require-trusted-types-for' + v => v['violated-directive'] === 'require-trusted-types-for', ); if (!violations || violations.length === 0) { diff --git a/runner/ratings/built-in-ratings/successful-build-rating.ts b/runner/ratings/built-in-ratings/successful-build-rating.ts index 1b1146c..ec87913 100644 --- a/runner/ratings/built-in-ratings/successful-build-rating.ts +++ b/runner/ratings/built-in-ratings/successful-build-rating.ts @@ -1,10 +1,5 @@ -import { BuildResultStatus } from '../../workers/builder/builder-types.js'; -import { - PerBuildRating, - RatingKind, - RatingCategory, - RatingState, -} from '../rating-types.js'; +import {BuildResultStatus} from '../../workers/builder/builder-types.js'; +import {PerBuildRating, RatingKind, RatingCategory, RatingState} from '../rating-types.js'; /** Rating which verifies that the application builds successfully. */ export const successfulBuildRating: PerBuildRating = { @@ -15,11 +10,8 @@ export const successfulBuildRating: PerBuildRating = { category: RatingCategory.HIGH_IMPACT, scoreReduction: '50%', // Reduce the amount of points in case we've built the code with a few repair attempts. - rate: ({ buildResult, repairAttempts }) => ({ + rate: ({buildResult, repairAttempts}) => ({ state: RatingState.EXECUTED, - coefficient: - buildResult.status === BuildResultStatus.ERROR - ? 0 - : 1 / (repairAttempts + 1), + coefficient: buildResult.status === BuildResultStatus.ERROR ? 0 : 1 / (repairAttempts + 1), }), }; diff --git a/runner/ratings/built-in-ratings/sufficient-code-size-rating.ts b/runner/ratings/built-in-ratings/sufficient-code-size-rating.ts index 32d4d30..859d0d6 100644 --- a/runner/ratings/built-in-ratings/sufficient-code-size-rating.ts +++ b/runner/ratings/built-in-ratings/sufficient-code-size-rating.ts @@ -8,8 +8,7 @@ import { /** Rating that verifies that the LLM didn't generate empty files. */ export const sufficientCodeSizeRating: PerFileRating = { name: 'Sufficient Code Size (over 50b)', - description: - 'Ensures the generated code is not trivially small (e.g. < 50b).', + description: 'Ensures the generated code is not trivially small (e.g. < 50b).', category: RatingCategory.HIGH_IMPACT, id: 'common-generated-code-size', scoreReduction: '30%', diff --git a/runner/ratings/built-in-ratings/sufficient-generated-files-rating.ts b/runner/ratings/built-in-ratings/sufficient-generated-files-rating.ts index 808cbe3..960929b 100644 --- a/runner/ratings/built-in-ratings/sufficient-generated-files-rating.ts +++ b/runner/ratings/built-in-ratings/sufficient-generated-files-rating.ts @@ -1,9 +1,4 @@ -import { - PerBuildRating, - RatingCategory, - RatingKind, - RatingState, -} from '../rating-types.js'; +import {PerBuildRating, RatingCategory, RatingKind, RatingState} from '../rating-types.js'; /** Rating which verifies that the LLM produced at least one file. */ export const sufficientGeneratedFilesRating: PerBuildRating = { @@ -13,7 +8,7 @@ export const sufficientGeneratedFilesRating: PerBuildRating = { id: 'common-generated-file-count', scoreReduction: '100%', kind: RatingKind.PER_BUILD, - rate: ({ generatedFileCount }) => ({ + rate: ({generatedFileCount}) => ({ state: RatingState.EXECUTED, coefficient: generatedFileCount > 0 ? 1 : 0, }), diff --git a/runner/ratings/built-in-ratings/user-journeys-rating.ts b/runner/ratings/built-in-ratings/user-journeys-rating.ts index 5e9de28..57f4bc9 100644 --- a/runner/ratings/built-in-ratings/user-journeys-rating.ts +++ b/runner/ratings/built-in-ratings/user-journeys-rating.ts @@ -1,20 +1,14 @@ -import { - PerBuildRating, - RatingKind, - RatingCategory, - RatingState, -} from '../rating-types.js'; +import {PerBuildRating, RatingKind, RatingCategory, RatingState} from '../rating-types.js'; /** Rating that verifies the interactivity of the generated app. */ export const userJourneysRating: PerBuildRating = { id: 'user-journey-tests', name: 'User Journey validation', - description: - 'Ensures that all User Journeys are working in the generated app', + description: 'Ensures that all User Journeys are working in the generated app', kind: RatingKind.PER_BUILD, category: RatingCategory.MEDIUM_IMPACT, scoreReduction: '30%', - rate: ({ serveResult }) => { + rate: ({serveResult}) => { if (serveResult === null || serveResult.userJourneyAgentOutput === null) { return { state: RatingState.SKIPPED, @@ -40,23 +34,23 @@ export const userJourneysRating: PerBuildRating = { }; } - const failingCount = output.analysis.filter((c) => c.passing).length; + const failingCount = output.analysis.filter(c => c.passing).length; const percentagePassing = failingCount / output.analysis.length; let message: string; if (percentagePassing === 1) { - message = `All validations passed.\n${output.analysis.map((c) => `- ${c.journey}`).join('\n')}`; + message = `All validations passed.\n${output.analysis.map(c => `- ${c.journey}`).join('\n')}`; } else { const failureMsg = output.analysis .map( - (c) => + c => `- ${c.journey}${ c.passing ? '' : `(Failing)\n Expected: ${c.failure?.expected} Observed: ${c.failure?.observed}` - }` + }`, ) .join('\n'); message = `${failingCount}/${output.analysis.length} passed.\n${failureMsg}`; diff --git a/runner/ratings/built-in-ratings/valid-css-rating.ts b/runner/ratings/built-in-ratings/valid-css-rating.ts index 3d12a02..59f4c60 100644 --- a/runner/ratings/built-in-ratings/valid-css-rating.ts +++ b/runner/ratings/built-in-ratings/valid-css-rating.ts @@ -15,7 +15,7 @@ export const validCssRating: PerFileRating = { kind: RatingKind.PER_FILE, id: 'common-valid-css', filter: PerFileRatingContentType.CSS, - rate: async (code) => { + rate: async code => { const linterResult = await stylelint.lint({ code: code, cwd: import.meta.dirname, @@ -53,8 +53,7 @@ export const validCssRating: PerFileRating = { // One file processed produces one result. const lintResult = linterResult.results[0]; - const warningCount = - lintResult.warnings.length + lintResult.deprecations.length; + const warningCount = lintResult.warnings.length + lintResult.deprecations.length; if (warningCount == 0) { return 1; diff --git a/runner/ratings/built-in-ratings/visual-appearance-rating.ts b/runner/ratings/built-in-ratings/visual-appearance-rating.ts index c07cd85..d8fa6f3 100644 --- a/runner/ratings/built-in-ratings/visual-appearance-rating.ts +++ b/runner/ratings/built-in-ratings/visual-appearance-rating.ts @@ -1,27 +1,18 @@ -import { TimeoutError } from 'puppeteer'; -import { AutoRateResult } from '../autoraters/auto-rate-shared.js'; -import { autoRateAppearance } from '../autoraters/visuals-rater.js'; -import { - LLMBasedRating, - RatingKind, - RatingCategory, - RatingState, -} from '../rating-types.js'; +import {TimeoutError} from 'puppeteer'; +import {AutoRateResult} from '../autoraters/auto-rate-shared.js'; +import {autoRateAppearance} from '../autoraters/visuals-rater.js'; +import {LLMBasedRating, RatingKind, RatingCategory, RatingState} from '../rating-types.js'; /** Rating which verifies the appearance of the generated app using an LLM. */ export const visualAppearanceRating: LLMBasedRating = { kind: RatingKind.LLM_BASED, name: 'UI & Visual appearance (LLM-Rated)', - description: - 'Rates the app based on its visuals (UI visuals and feature completeness).', + description: 'Rates the app based on its visuals (UI visuals and feature completeness).', category: RatingCategory.MEDIUM_IMPACT, scoreReduction: '30%', id: 'common-autorater-visuals', - rate: async (ctx) => { - if ( - ctx.serveTestingResult === null || - ctx.serveTestingResult.screenshotPngUrl === undefined - ) { + rate: async ctx => { + if (ctx.serveTestingResult === null || ctx.serveTestingResult.screenshotPngUrl === undefined) { return { state: RatingState.SKIPPED, message: 'No screenshot available', @@ -38,7 +29,7 @@ export const visualAppearanceRating: LLMBasedRating = { ctx.environment, ctx.fullPromptText, ctx.serveTestingResult.screenshotPngUrl, - ctx.currentPromptDef.name + ctx.currentPromptDef.name, ); } catch (e) { if (e instanceof TimeoutError) { diff --git a/runner/ratings/built-in.ts b/runner/ratings/built-in.ts index c07264a..57f37b9 100644 --- a/runner/ratings/built-in.ts +++ b/runner/ratings/built-in.ts @@ -1,4 +1,4 @@ -import { Rating } from './rating-types.js'; +import {Rating} from './rating-types.js'; import { successfulBuildRating, noRuntimeExceptionsRating, diff --git a/runner/ratings/embedded-languages.ts b/runner/ratings/embedded-languages.ts index 18f65ef..4d02c4e 100644 --- a/runner/ratings/embedded-languages.ts +++ b/runner/ratings/embedded-languages.ts @@ -1,5 +1,5 @@ import ts from 'typescript'; -import { LlmResponseFile } from '../shared-interfaces.js'; +import {LlmResponseFile} from '../shared-interfaces.js'; /** * Extracts embedded stylesheets and HTML from a TypeScript file. @@ -12,11 +12,7 @@ export function extractEmbeddedCodeFromTypeScript(file: LlmResponseFile) { return null; } - const sourceFile = ts.createSourceFile( - 'temp.ts', - file.code, - ts.ScriptTarget.Latest - ); + const sourceFile = ts.createSourceFile('temp.ts', file.code, ts.ScriptTarget.Latest); const stylesheets: string[] = []; const templates: string[] = []; @@ -35,10 +31,7 @@ export function extractEmbeddedCodeFromTypeScript(file: LlmResponseFile) { continue; } - if ( - prop.name.text === 'template' && - ts.isStringLiteralLike(prop.initializer) - ) { + if (prop.name.text === 'template' && ts.isStringLiteralLike(prop.initializer)) { templates.push(prop.initializer.text); } else if (prop.name.text === 'styles') { if (ts.isStringLiteralLike(prop.initializer)) { @@ -58,7 +51,7 @@ export function extractEmbeddedCodeFromTypeScript(file: LlmResponseFile) { }); return { - stylesheets: stylesheets.map((c) => ({ code: c, filePath: file.filePath })), - templates: templates.map((c) => ({ code: c, filePath: file.filePath })), + stylesheets: stylesheets.map(c => ({code: c, filePath: file.filePath})), + templates: templates.map(c => ({code: c, filePath: file.filePath})), }; } diff --git a/runner/ratings/rate-code.ts b/runner/ratings/rate-code.ts index a4f38fc..99d0874 100644 --- a/runner/ratings/rate-code.ts +++ b/runner/ratings/rate-code.ts @@ -1,5 +1,5 @@ -import { BuildResult } from '../workers/builder/builder-types.js'; -import { extname } from 'path'; +import {BuildResult} from '../workers/builder/builder-types.js'; +import {extname} from 'path'; import { IndividualAssessment, CodeAssessmentScore, @@ -22,12 +22,12 @@ import { CATEGORY_NAMES, RatingsResult, } from './rating-types.js'; -import { extractEmbeddedCodeFromTypeScript } from './embedded-languages.js'; -import { Environment } from '../configuration/environment.js'; -import { GenkitRunner } from '../codegen/genkit/genkit-runner.js'; -import { ProgressLogger } from '../progress/progress-logger.js'; -import { UserFacingError } from '../utils/errors.js'; -import { ServeTestingResult } from '../workers/serve-testing/worker-types.js'; +import {extractEmbeddedCodeFromTypeScript} from './embedded-languages.js'; +import {Environment} from '../configuration/environment.js'; +import {GenkitRunner} from '../codegen/genkit/genkit-runner.js'; +import {ProgressLogger} from '../progress/progress-logger.js'; +import {UserFacingError} from '../utils/errors.js'; +import {ServeTestingResult} from '../workers/serve-testing/worker-types.js'; interface FileOrEmbeddedSyntheticFile { /** @@ -41,10 +41,7 @@ interface FileOrEmbeddedSyntheticFile { code: string; } -type CategorizedFiles = Record< - PerFileRatingContentType, - FileOrEmbeddedSyntheticFile[] ->; +type CategorizedFiles = Record; export async function rateGeneratedCode( llm: GenkitRunner, @@ -58,7 +55,7 @@ export async function rateGeneratedCode( axeRepairAttempts: number, abortSignal: AbortSignal, progress: ProgressLogger, - autoraterModel: string + autoraterModel: string, ): Promise { let categorizedFiles: CategorizedFiles | null = null; let totalPoints = 0; @@ -78,7 +75,7 @@ export async function rateGeneratedCode( RatingCategory.HIGH_IMPACT, RatingCategory.MEDIUM_IMPACT, RatingCategory.LOW_IMPACT, - ].map((category) => ({ + ].map(category => ({ id: category, name: CATEGORY_NAMES[category], points: 0, @@ -98,15 +95,11 @@ export async function rateGeneratedCode( repairAttempts, outputFiles.length, axeRepairAttempts, - ratingsResult + ratingsResult, ); } else if (current.kind === RatingKind.PER_FILE) { categorizedFiles ??= splitFilesIntoCategories(outputFiles); - result = await runPerFileRating( - current, - categorizedFiles, - ratingsResult - ); + result = await runPerFileRating(current, categorizedFiles, ratingsResult); } else if (current.kind === RatingKind.LLM_BASED) { result = await runLlmBasedRating( environment, @@ -121,16 +114,13 @@ export async function rateGeneratedCode( axeRepairAttempts, abortSignal, autoraterModel, - ratingsResult + ratingsResult, ); } else { throw new UserFacingError(`Unsupported rating type ${current}`); } } catch (error) { - result = getSkippedAssessment( - current, - `Error during execution:\n${error}` - ); + result = getSkippedAssessment(current, `Error during execution:\n${error}`); } if (result.state === IndividualAssessmentState.EXECUTED && result.usage) { @@ -139,11 +129,11 @@ export async function rateGeneratedCode( tokenUsage.totalTokens += result.usage.totalTokens ?? 0; } - const category = categories.find((c) => c.id === result.category); + const category = categories.find(c => c.id === result.category); if (!category) { throw new UserFacingError( - `Could not find category for rating "${result.id}" with category "${result.category}"` + `Could not find category for rating "${result.id}" with category "${result.category}"`, ); } @@ -158,17 +148,14 @@ export async function rateGeneratedCode( for (const result of category.assessments) { if (result.state === IndividualAssessmentState.EXECUTED) { const reduction = - parsePercentString(result.scoreReduction) * - (1 - result.successPercentage); + parsePercentString(result.scoreReduction) * (1 - result.successPercentage); multiplier = Math.max(0, multiplier - reduction); } } // Round the number to two decimals. // via: https://stackoverflow.com/questions/11832914/how-to-round-to-at-most-2-decimal-places-if-necessary - category.points = - Math.round((category.maxPoints * multiplier + Number.EPSILON) * 100) / - 100; + category.points = Math.round((category.maxPoints * multiplier + Number.EPSILON) * 100) / 100; maxOverallPoints += category.maxPoints; totalPoints += category.points; } @@ -188,7 +175,7 @@ function runPerBuildRating( repairAttempts: number, generatedFileCount: number, axeRepairAttempts: number, - ratingsResult: RatingsResult + ratingsResult: RatingsResult, ): IndividualAssessment | SkippedIndividualAssessment { const rateResult = rating.rate({ buildResult, @@ -206,8 +193,7 @@ function runPerBuildRating( } const message = - getMessage(rateResult.coefficient) + - (rateResult.message ? `\n${rateResult.message}` : ''); + getMessage(rateResult.coefficient) + (rateResult.message ? `\n${rateResult.message}` : ''); return getIndividualAssessment(rating, rateResult.coefficient, message); } @@ -215,7 +201,7 @@ function runPerBuildRating( async function runPerFileRating( rating: PerFileRating, categorizedFiles: CategorizedFiles, - ratingsResult: RatingsResult + ratingsResult: RatingsResult, ): Promise { const errorMessages: string[] = []; let contentType: PerFileRatingContentType; @@ -239,8 +225,7 @@ async function runPerFileRating( for (const file of files) { const matchesFilePattern = contentFilterPattern === null || contentFilterPattern.test(file.code); - const matchesPathPattern = - pathFilterPattern === null || pathFilterPattern.test(file.filePath); + const matchesPathPattern = pathFilterPattern === null || pathFilterPattern.test(file.filePath); if (matchesFilePattern && matchesPathPattern) { // Remove comments from the code to avoid false-detection of bad patterns. @@ -269,11 +254,7 @@ async function runPerFileRating( let message = getMessage(average); if (errorMessages.length) { - message += [ - '', - 'Errors:', - errorMessages.join(`\n ${'-'.repeat(50)} \n`), - ].join('\n'); + message += ['', 'Errors:', errorMessages.join(`\n ${'-'.repeat(50)} \n`)].join('\n'); } return getIndividualAssessment(rating, average, message); @@ -292,7 +273,7 @@ async function runLlmBasedRating( axeRepairAttempts: number, abortSignal: AbortSignal, autoraterModel: string, - ratingsResult: RatingsResult + ratingsResult: RatingsResult, ): Promise { const result = await rating.rate({ environment, @@ -316,11 +297,7 @@ async function runLlmBasedRating( let message = `${getMessage(result.coefficient)}\n${result.details.summary}`; if (result.coefficient < 1) { - message += - ':\n' + - result.details.categories - .map((category) => category.message) - .join('\n '); + message += ':\n' + result.details.categories.map(category => category.message).join('\n '); } return getIndividualAssessment(rating, result.coefficient, message); @@ -329,7 +306,7 @@ async function runLlmBasedRating( function getIndividualAssessment( rating: Rating, rateResult: number, - message: string + message: string, ): IndividualAssessment { return { state: IndividualAssessmentState.EXECUTED, @@ -343,10 +320,7 @@ function getIndividualAssessment( }; } -function getSkippedAssessment( - rating: Rating, - message: string -): SkippedIndividualAssessment { +function getSkippedAssessment(rating: Rating, message: string): SkippedIndividualAssessment { return { state: IndividualAssessmentState.SKIPPED, name: rating.name, @@ -385,9 +359,7 @@ function getMessage(coefficient: number) { return `Partial Pass (${Math.round(coefficient * 100)}%)`; } -function splitFilesIntoCategories( - outputFiles: LlmResponseFile[] -): CategorizedFiles { +function splitFilesIntoCategories(outputFiles: LlmResponseFile[]): CategorizedFiles { const ts: FileOrEmbeddedSyntheticFile[] = []; const css: FileOrEmbeddedSyntheticFile[] = []; const html: FileOrEmbeddedSyntheticFile[] = []; @@ -400,8 +372,7 @@ function splitFilesIntoCategories( all.push(file); if (extension === '.ts' || extension === '.tsx') { - const embedded = - extension === '.ts' ? extractEmbeddedCodeFromTypeScript(file) : null; + const embedded = extension === '.ts' ? extractEmbeddedCodeFromTypeScript(file) : null; if (embedded !== null) { css.push(...embedded.stylesheets); diff --git a/runner/ratings/rating-types.ts b/runner/ratings/rating-types.ts index 2ce3f0d..fceb104 100644 --- a/runner/ratings/rating-types.ts +++ b/runner/ratings/rating-types.ts @@ -1,5 +1,5 @@ import z from 'zod'; -import { BuildResult } from '../workers/builder/builder-types.js'; +import {BuildResult} from '../workers/builder/builder-types.js'; import type { IndividualAssessment, LlmResponseFile, @@ -7,9 +7,9 @@ import type { SkippedIndividualAssessment, Usage, } from '../shared-interfaces.js'; -import { Environment } from '../configuration/environment.js'; -import { GenkitRunner } from '../codegen/genkit/genkit-runner.js'; -import { ServeTestingResult } from '../workers/serve-testing/worker-types.js'; +import {Environment} from '../configuration/environment.js'; +import {GenkitRunner} from '../codegen/genkit/genkit-runner.js'; +import {ServeTestingResult} from '../workers/serve-testing/worker-types.js'; /** Possible types of ratings. */ export enum RatingKind { @@ -66,10 +66,8 @@ const perBuildRatingSchema = z repairAttempts: z.number(), axeRepairAttempts: z.number(), generatedFileCount: z.number(), - ratingsResult: z.record( - z.custom() - ), - }) + ratingsResult: z.record(z.custom()), + }), ) .returns(z.custom()), }) @@ -84,23 +82,19 @@ const perFileRatingSchema = z .args( z.string(), z.string().optional(), - z.record(z.custom()) + z.record(z.custom()), ) .returns(z.custom()), filter: z.union([ z - .custom((value) => typeof value === 'number') + .custom(value => typeof value === 'number') .describe('PerFileRatingContentType'), z.strictObject({ type: z - .custom( - (value) => typeof value === 'number' - ) + .custom(value => typeof value === 'number') .describe('PerFileRatingContentType'), - pattern: z.custom((data) => data instanceof RegExp).optional(), - pathPattern: z - .custom((data) => data instanceof RegExp) - .optional(), + pattern: z.custom(data => data instanceof RegExp).optional(), + pathPattern: z.custom(data => data instanceof RegExp).optional(), }), ]), }) @@ -176,14 +170,11 @@ export interface ExecutedLLMBasedRating { tokenUsage: Usage; details: { summary: string; - categories: { name: string; message: string }[]; + categories: {name: string; message: string}[]; }; } -export type RatingsResult = Record< - string, - IndividualAssessment | SkippedIndividualAssessment ->; +export type RatingsResult = Record; export interface LLMBasedRatingContext { environment: Environment; diff --git a/runner/ratings/stats.ts b/runner/ratings/stats.ts index 928ddf6..d278d87 100644 --- a/runner/ratings/stats.ts +++ b/runner/ratings/stats.ts @@ -1,8 +1,5 @@ -import { - BuildErrorType, - BuildResultStatus, -} from '../workers/builder/builder-types.js'; -import { UserFacingError } from '../utils/errors.js'; +import {BuildErrorType, BuildResultStatus} from '../workers/builder/builder-types.js'; +import {UserFacingError} from '../utils/errors.js'; import { AggregatedRunStats, AssessmentResult, @@ -12,10 +9,10 @@ import { /** Possible buckets that scores can be categorized into. */ const BUCKET_CONFIG = [ - { name: 'Excellent', min: 98, max: 100, id: 'excellent' }, - { name: 'Great', min: 85, max: 97, id: 'great' }, - { name: 'Good', min: 71, max: 84, id: 'good' }, - { name: 'Poor', min: 0, max: 70, id: 'poor' }, + {name: 'Excellent', min: 98, max: 100, id: 'excellent'}, + {name: 'Great', min: 85, max: 97, id: 'great'}, + {name: 'Good', min: 71, max: 84, id: 'good'}, + {name: 'Poor', min: 0, max: 70, id: 'poor'}, ]; /** @@ -24,9 +21,7 @@ const BUCKET_CONFIG = [ * @param assessments - An array of `AssessmentResult` objects. * @returns An object containing aggregated build and check statistics. */ -export function calculateBuildAndCheckStats( - assessments: AssessmentResult[] -): AggregatedRunStats { +export function calculateBuildAndCheckStats(assessments: AssessmentResult[]): AggregatedRunStats { let successfulInitialBuilds = 0; let successfulBuildsAfterRepair = 0; let failedBuilds = 0; @@ -38,11 +33,9 @@ export function calculateBuildAndCheckStats( appsWithoutErrors: number; } | undefined; - let securityStats: - | { appsWithErrors: number; appsWithoutErrors: number } - | undefined; + let securityStats: {appsWithErrors: number; appsWithoutErrors: number} | undefined; const errorDistribution: Partial> = {}; - const buckets: ScoreBucket[] = BUCKET_CONFIG.map((b) => ({ + const buckets: ScoreBucket[] = BUCKET_CONFIG.map(b => ({ name: b.name, nameWithLabels: `${b.name} (${b.min === b.max ? b.max : `${b.min}-${b.max}`}%)`, min: b.min, @@ -51,7 +44,7 @@ export function calculateBuildAndCheckStats( appsCount: 0, })); - assessments.forEach((result) => { + assessments.forEach(result => { if (result.finalAttempt.buildResult.status === BuildResultStatus.SUCCESS) { if (result.repairAttempts === 0) { successfulInitialBuilds++; @@ -62,13 +55,12 @@ export function calculateBuildAndCheckStats( failedBuilds++; if (result.finalAttempt.buildResult.errorType) { errorDistribution[result.finalAttempt.buildResult.errorType] = - (errorDistribution[result.finalAttempt.buildResult.errorType] || 0) + - 1; + (errorDistribution[result.finalAttempt.buildResult.errorType] || 0) + 1; } } if (result.finalAttempt.serveTestingResult?.runtimeErrors != undefined) { - runtimeStats ??= { appsWithErrors: 0, appsWithoutErrors: 0 }; + runtimeStats ??= {appsWithErrors: 0, appsWithoutErrors: 0}; if (result.finalAttempt.serveTestingResult.runtimeErrors.trim() != '') { runtimeStats.appsWithErrors++; } @@ -89,8 +81,8 @@ export function calculateBuildAndCheckStats( } } } - securityStats ??= { appsWithErrors: 0, appsWithoutErrors: 0 }; - const { numCspViolations, numTrustedTypesViolations } = ( + securityStats ??= {appsWithErrors: 0, appsWithoutErrors: 0}; + const {numCspViolations, numTrustedTypesViolations} = ( result.finalAttempt.serveTestingResult?.cspViolations || [] ).reduce( (acc, v) => { @@ -101,12 +93,11 @@ export function calculateBuildAndCheckStats( } return acc; }, - { numCspViolations: 0, numTrustedTypesViolations: 0 } + {numCspViolations: 0, numTrustedTypesViolations: 0}, ); const hasSafetyViolations = - (result.finalAttempt.buildResult.safetyWebReportJson?.[0]?.violations - ?.length ?? 0) > 0; + (result.finalAttempt.buildResult.safetyWebReportJson?.[0]?.violations?.length ?? 0) > 0; // TODO: Consider numTrustedTypesViolations once we update autoCsp and re-enable the rating. if (hasSafetyViolations || numCspViolations > 0) { securityStats.appsWithErrors++; @@ -115,16 +106,12 @@ export function calculateBuildAndCheckStats( } const scorePercentage = Math.floor( - (result.score.totalPoints / result.score.maxOverallPoints) * 100 - ); - const bucket = buckets.find( - (b) => scorePercentage >= b.min && scorePercentage <= b.max + (result.score.totalPoints / result.score.maxOverallPoints) * 100, ); + const bucket = buckets.find(b => scorePercentage >= b.min && scorePercentage <= b.max); if (!bucket) { - throw new UserFacingError( - `Score ${scorePercentage} did not fit into any bucket` - ); + throw new UserFacingError(`Score ${scorePercentage} did not fit into any bucket`); } bucket.appsCount++; @@ -135,19 +122,14 @@ export function calculateBuildAndCheckStats( successfulInitialBuilds, successfulBuildsAfterRepair, failedBuilds, - errorDistribution: - Object.keys(errorDistribution).length > 0 - ? errorDistribution - : undefined, + errorDistribution: Object.keys(errorDistribution).length > 0 ? errorDistribution : undefined, }, buckets, runtime: runtimeStats ? { appsWithErrors: runtimeStats.appsWithErrors, appsWithoutErrors: - successfulInitialBuilds + - successfulBuildsAfterRepair - - runtimeStats.appsWithErrors, + successfulInitialBuilds + successfulBuildsAfterRepair - runtimeStats.appsWithErrors, } : undefined, accessibility: accessibilityStats, diff --git a/runner/report-cli.ts b/runner/report-cli.ts index ed51fdd..0f5a855 100644 --- a/runner/report-cli.ts +++ b/runner/report-cli.ts @@ -1,9 +1,9 @@ -import { Arguments, Argv, CommandModule } from 'yargs'; -import { join, relative } from 'path'; -import { executeCommand } from './utils/exec.js'; -import { REPORTS_ROOT_DIR } from './configuration/constants.js'; -import { toProcessAbsolutePath } from './file-system-utils.js'; -import { formatTitleCard } from './reporting/format.js'; +import {Arguments, Argv, CommandModule} from 'yargs'; +import {join, relative} from 'path'; +import {executeCommand} from './utils/exec.js'; +import {REPORTS_ROOT_DIR} from './configuration/constants.js'; +import {toProcessAbsolutePath} from './file-system-utils.js'; +import {formatTitleCard} from './reporting/format.js'; export const ReportModule = { builder, @@ -54,9 +54,7 @@ async function handler(cliArgs: Arguments): Promise { }; if (cliArgs.reportsLoader) { - environmentVariables['CODEGEN_REPORTS_LOADER'] = toProcessAbsolutePath( - cliArgs.reportsLoader - ); + environmentVariables['CODEGEN_REPORTS_LOADER'] = toProcessAbsolutePath(cliArgs.reportsLoader); } await executeCommand( @@ -74,11 +72,11 @@ async function handler(cliArgs: Arguments): Promise { [ `View your reports at http://localhost:${cliArgs.port}`, `Reports are served from ${relative(process.cwd(), reportsDir)}`, - ].join('\n') - ) + ].join('\n'), + ), ); }, }, - } + }, ); } diff --git a/runner/reporting/ai-summarize.ts b/runner/reporting/ai-summarize.ts index a43c629..ce0c182 100644 --- a/runner/reporting/ai-summarize.ts +++ b/runner/reporting/ai-summarize.ts @@ -1,6 +1,6 @@ -import { marked } from 'marked'; -import { BuildResultStatus } from '../workers/builder/builder-types.js'; -import { GenkitRunner } from '../codegen/genkit/genkit-runner.js'; +import {marked} from 'marked'; +import {BuildResultStatus} from '../workers/builder/builder-types.js'; +import {GenkitRunner} from '../codegen/genkit/genkit-runner.js'; import { AssessmentResult, IndividualAssessment, @@ -10,7 +10,7 @@ import { export async function summarizeReportWithAI( llm: GenkitRunner, abortSignal: AbortSignal, - assessments: AssessmentResult[] + assessments: AssessmentResult[], ) { const totalApps = assessments.length; const prompt = `\ @@ -63,47 +63,43 @@ Categorize the failures and provide a brief summary of the report. Keep it short function serializeReportForPrompt(assessments: AssessmentResult[]): string { return assessments .map( - (app) => + app => `\ Name: ${app.promptDef.name} Score: ${app.score.totalPoints}/${app.score.maxOverallPoints} Failed checks: ${JSON.stringify( app.score.categories - .flatMap((category) => category.assessments) + .flatMap(category => category.assessments) .filter( (a): a is IndividualAssessment => - a.state === IndividualAssessmentState.EXECUTED && - a.successPercentage < 1 + a.state === IndividualAssessmentState.EXECUTED && a.successPercentage < 1, ) - .map((c) => ({ + .map(c => ({ description: c.description, points: `${(c.successPercentage * 100).toFixed(2)}/100`, message: c.message, })), null, - 2 + 2, )} Build results: ${JSON.stringify( - app.attemptDetails.map((a) => ({ + app.attemptDetails.map(a => ({ buildResult: { message: a.buildResult.message, - status: - a.buildResult.status === BuildResultStatus.ERROR - ? 'Error' - : 'Success', + status: a.buildResult.status === BuildResultStatus.ERROR ? 'Error' : 'Success', }, attempt: a.attempt, })), null, - 2 + 2, )} Serve testing results: ${JSON.stringify( - app.attemptDetails.map((a) => ({ + app.attemptDetails.map(a => ({ serveTestingResult: { runtimeErrors: a.serveTestingResult?.runtimeErrors, }, - })) - )}` + })), + )}`, ) .join('\n------------\n'); } diff --git a/runner/reporting/format.ts b/runner/reporting/format.ts index 28ec7cf..8e9e821 100644 --- a/runner/reporting/format.ts +++ b/runner/reporting/format.ts @@ -48,7 +48,7 @@ export function formatTokenCount(count: number | undefined | null): string { */ export function formatAssessmentMessage( assessment: IndividualAssessment | SkippedIndividualAssessment, - message: string + message: string, ): string { if (assessment.state === IndividualAssessmentState.SKIPPED) { return chalk.gray(message); diff --git a/runner/reporting/migrations/v2_to_v3.ts b/runner/reporting/migrations/v2_to_v3.ts index 1e4a6fe..8a66235 100644 --- a/runner/reporting/migrations/v2_to_v3.ts +++ b/runner/reporting/migrations/v2_to_v3.ts @@ -24,7 +24,7 @@ export function convertV2ReportToV3Report(doc: any) { axeViolations: origBuildResult.axeViolations, }; - return { buildResult, serveTestingResult }; + return {buildResult, serveTestingResult}; }; for (const result of doc.results) { diff --git a/runner/reporting/report-local-disk.ts b/runner/reporting/report-local-disk.ts index e4621d5..1d3b803 100644 --- a/runner/reporting/report-local-disk.ts +++ b/runner/reporting/report-local-disk.ts @@ -1,7 +1,7 @@ -import { readFile } from 'node:fs/promises'; -import { dirname, join } from 'node:path'; -import { RunGroup, RunInfo } from '../shared-interfaces.js'; -import { glob } from 'tinyglobby'; +import {readFile} from 'node:fs/promises'; +import {dirname, join} from 'node:path'; +import {RunGroup, RunInfo} from '../shared-interfaces.js'; +import {glob} from 'tinyglobby'; /** Type describing a map from group report IDs to their runs. */ export type FetchedLocalReports = Map< @@ -13,9 +13,7 @@ export type FetchedLocalReports = Map< >; /** Fetches local report data from the given directory. */ -export async function fetchReportsFromDisk( - directory: string -): Promise { +export async function fetchReportsFromDisk(directory: string): Promise { const data: FetchedLocalReports = new Map(); const groupFiles = await glob('**/groups.json', { cwd: directory, @@ -39,8 +37,8 @@ export async function fetchReportsFromDisk( // were part of the same invocation. Add a unique suffix to the ID to // prevent further grouping. run.group = group.id = `${group.id}-l${index}`; - data.set(group.id, { group, run }); - }) + data.set(group.id, {group, run}); + }), ); return data; diff --git a/runner/reporting/report-logging.ts b/runner/reporting/report-logging.ts index 8552454..b0dd85f 100644 --- a/runner/reporting/report-logging.ts +++ b/runner/reporting/report-logging.ts @@ -1,18 +1,11 @@ -import { join } from 'path'; +import {join} from 'path'; import chalk from 'chalk'; import boxen from 'boxen'; -import { - IndividualAssessmentState, - RunInfo, - ScoreBucket, -} from '../shared-interfaces.js'; -import { - DEFAULT_AUTORATER_MODEL_NAME, - REPORTS_ROOT_DIR, -} from '../configuration/constants.js'; -import { calculateBuildAndCheckStats } from '../ratings/stats.js'; -import { safeWriteFile } from '../file-system-utils.js'; -import { BuildResultStatus } from '../workers/builder/builder-types.js'; +import {IndividualAssessmentState, RunInfo, ScoreBucket} from '../shared-interfaces.js'; +import {DEFAULT_AUTORATER_MODEL_NAME, REPORTS_ROOT_DIR} from '../configuration/constants.js'; +import {calculateBuildAndCheckStats} from '../ratings/stats.js'; +import {safeWriteFile} from '../file-system-utils.js'; +import {BuildResultStatus} from '../workers/builder/builder-types.js'; import { formatTokenCount, greenCheckmark, @@ -22,10 +15,10 @@ import { formatScore, formatTitleCard, } from './format.js'; -import { Environment } from '../configuration/environment.js'; -import { LlmRunner } from '../codegen/llm-runner.js'; -import { groupSimilarReports } from '../orchestration/grouping.js'; -import { LocalEnvironment } from '../configuration/environment-local.js'; +import {Environment} from '../configuration/environment.js'; +import {LlmRunner} from '../codegen/llm-runner.js'; +import {groupSimilarReports} from '../orchestration/grouping.js'; +import {LocalEnvironment} from '../configuration/environment-local.js'; /** * Generates a structured report on fs, based on the assessment run information. @@ -48,17 +41,11 @@ import { LocalEnvironment } from '../configuration/environment-local.js'; * @param id ID of the environment that was used for the eval. * @returns The original `runInfo` object, allowing for chaining. */ -export async function writeReportToDisk( - runInfo: RunInfo, - id: string -): Promise { +export async function writeReportToDisk(runInfo: RunInfo, id: string): Promise { // Sanitize report name: allow only a-z, A-Z, 0-9, and hyphens. Replace others with a hyphen. - const sanitizedReportName = runInfo.details.reportName.replace( - /[^a-zA-Z0-9-]/g, - '-' - ); + const sanitizedReportName = runInfo.details.reportName.replace(/[^a-zA-Z0-9-]/g, '-'); - const { results } = runInfo; + const {results} = runInfo; const reportBaseDir = join(REPORTS_ROOT_DIR, id, sanitizedReportName); // Write `summary.json` file, which contains **all** available info. @@ -84,10 +71,7 @@ export async function writeReportToDisk( // Write file with stats const statsJson = { - fileSize: attempt.outputFiles.reduce( - (total, current) => (total += current.code.length), - 0 - ), + fileSize: attempt.outputFiles.reduce((total, current) => (total += current.code.length), 0), buildResult: attempt.buildResult, serveTestingResult: attempt.serveTestingResult, }; @@ -95,17 +79,12 @@ export async function writeReportToDisk( await safeWriteFile(statsCodePath, printJson(statsJson)); await Promise.all( - result.outputFiles.map((file) => - safeWriteFile(join(attemptPath, file.filePath), file.code) - ) + result.outputFiles.map(file => safeWriteFile(join(attemptPath, file.filePath), file.code)), ); // Write build.log for failed builds if (attempt.buildResult.status === BuildResultStatus.ERROR) { - await safeWriteFile( - join(attemptPath, 'build.log'), - attempt.buildResult.message - ); + await safeWriteFile(join(attemptPath, 'build.log'), attempt.buildResult.message); } // Write screenshot to fs first, since we'll remove this info @@ -127,7 +106,7 @@ export async function writeReportToDisk( const reportFilePath = join(attemptPath, 'safety-web.json'); await safeWriteFile( reportFilePath, - JSON.stringify(attempt.buildResult.safetyWebReportJson, null, 2) + JSON.stringify(attempt.buildResult.safetyWebReportJson, null, 2), ); } @@ -139,7 +118,7 @@ export async function writeReportToDisk( const reportFilePath = join(attemptPath, 'csp-violations.json'); await safeWriteFile( reportFilePath, - JSON.stringify(attempt.serveTestingResult.cspViolations, null, 2) + JSON.stringify(attempt.serveTestingResult.cspViolations, null, 2), ); } } @@ -151,7 +130,7 @@ export async function writeReportToDisk( `${greenCheckmark()} Full report has been saved to the '${reportBaseDir}' directory.`, '🚀 Run `web-codegen-scorer report` to view the report in your browser!', '', - ].join('\n') + ].join('\n'), ); } @@ -165,15 +144,14 @@ export function logReportHeader( labels: string[]; startMcp?: boolean; autoraterModel?: string; - } + }, ): void { const titleCardText = [ 'Running a codegen evaluation with configuration:', '', ` - Environment: ${env.displayName}`, ` - Model: ${options.model}`, - options.autoraterModel && - options.autoraterModel !== DEFAULT_AUTORATER_MODEL_NAME + options.autoraterModel && options.autoraterModel !== DEFAULT_AUTORATER_MODEL_NAME ? ` - Autorater model: ${options.autoraterModel}` : null, ` - Runner: ${env instanceof LocalEnvironment ? env.llm.displayName : 'Remote'}`, @@ -186,58 +164,55 @@ export function logReportHeader( ` - Start time: ${new Date().toLocaleString()}`, ` - Number of prompts: ${promptsToProcess}`, ] - .filter((line) => line != null) + .filter(line => line != null) .join('\n'); console.log(formatTitleCard(titleCardText)); } export function logReportToConsole(runInfo: RunInfo): void { - const { details, results } = runInfo; - const { builds, buckets } = calculateBuildAndCheckStats(results); - const { usage } = details.summary; - const { successfulInitialBuilds, successfulBuildsAfterRepair } = builds; + const {details, results} = runInfo; + const {builds, buckets} = calculateBuildAndCheckStats(results); + const {usage} = details.summary; + const {successfulInitialBuilds, successfulBuildsAfterRepair} = builds; const totalResults = results.length || 1; // Avoid division by zero if results is empty results.forEach((result, index) => { console.log(` Prompt #${index}: ${chalk.bold(result.promptDef.name)}`); console.log(` Text: ${result.promptDef.prompt}`); - const { maxOverallPoints, totalPoints } = result.score; + const {maxOverallPoints, totalPoints} = result.score; const scorePercentage = (totalPoints / maxOverallPoints) * 100; const scoreMessage = `${Math.round(totalPoints)} / ${maxOverallPoints} points (${scorePercentage.toFixed(2)}%)`; console.log( - ` Code Quality Score: ${formatScore(totalPoints / maxOverallPoints, scoreMessage)}` + ` Code Quality Score: ${formatScore(totalPoints / maxOverallPoints, scoreMessage)}`, ); console.log(' Scoring Details'); - result.score.categories.forEach((category) => { - console.log( - ` ${category.name} (${category.points}/${category.maxPoints} points):` - ); + result.score.categories.forEach(category => { + console.log(` ${category.name} (${category.points}/${category.maxPoints} points):`); if (category.assessments.length === 0) { console.log(' No assessments'); } - category.assessments.forEach((assessment) => { + category.assessments.forEach(assessment => { let statusIcon: string; if (assessment.state === IndividualAssessmentState.SKIPPED) { statusIcon = '-'; } else { - statusIcon = - assessment.successPercentage === 1 - ? `${greenCheckmark()}` - : `${redX()}`; + statusIcon = assessment.successPercentage === 1 ? `${greenCheckmark()}` : `${redX()}`; } - const potentialMultilineAssessmentMessage = - assessment.message.replaceAll('\n', '\n '); + const potentialMultilineAssessmentMessage = assessment.message.replaceAll( + '\n', + '\n ', + ); const formattedMessage = formatAssessmentMessage( assessment, - potentialMultilineAssessmentMessage + potentialMultilineAssessmentMessage, ); console.log(` ${statusIcon} ${assessment.name}: ${formattedMessage}`); @@ -250,17 +225,14 @@ export function logReportToConsole(runInfo: RunInfo): void { } }); - const failedBuilds = - results.length - successfulInitialBuilds - successfulBuildsAfterRepair; + const failedBuilds = results.length - successfulInitialBuilds - successfulBuildsAfterRepair; const summaryLines = [ 'Run info:', ` - Environment: ${runInfo.details.summary.displayName}`, ` - Model: ${runInfo.details.summary.model}`, ` - Runner: ${runInfo.details.summary.runner?.displayName}`, - runInfo.details.labels?.length - ? ` - Labels: ${runInfo.details.labels.join(', ')}` - : null, + runInfo.details.labels?.length ? ` - Labels: ${runInfo.details.labels.join(', ')}` : null, ` - Framework: ${runInfo.details.summary.framework.clientSideFramework.displayName}`, ` - End time: ${new Date().toLocaleString()}`, ` - Total prompts processed: ${results.length}`, @@ -270,42 +242,42 @@ export function logReportToConsole(runInfo: RunInfo): void { 'Successful initial builds', successfulInitialBuilds, totalResults, - chalk.green + chalk.green, ), formatSummaryLine( 'Successful builds after repair', successfulBuildsAfterRepair, totalResults, - chalk.yellow + chalk.yellow, ), formatSummaryLine('Failed builds', failedBuilds, totalResults, chalk.red), '', 'Code quality stats:', - ...buckets.map((bucket) => + ...buckets.map(bucket => formatSummaryLine( bucket.nameWithLabels, bucket.appsCount, totalResults, - bucketToChalkFn(bucket) - ) + bucketToChalkFn(bucket), + ), ), '', 'Usage info:', ` - Input tokens: ${formatTokenCount(usage.inputTokens)}`, ` - Output tokens: ${formatTokenCount(usage.outputTokens)}`, ` - Total tokens: ${formatTokenCount(usage.totalTokens)}`, - ].filter((line) => line != null); + ].filter(line => line != null); console.log( boxen(summaryLines.join('\n'), { padding: 1, - margin: { top: 2 }, + margin: {top: 2}, width: Math.min(80, process.stdout.columns), borderColor: 'cyan', borderStyle: 'double', title: 'Assessment Summary', titleAlignment: 'center', - }) + }), ); } @@ -321,7 +293,7 @@ function formatSummaryLine( label: string, count: number, total: number, - colorFn: (value: string) => string + colorFn: (value: string) => string, ): string { const percentage = total > 0 ? (count / total) * 100 : 0; return ` - ${label}: ${colorFn(`${count} (${percentage.toFixed(1)}%)`)}`; diff --git a/runner/run-cli.ts b/runner/run-cli.ts index 8cddc26..73a44b6 100644 --- a/runner/run-cli.ts +++ b/runner/run-cli.ts @@ -1,26 +1,20 @@ -import { Arguments, Argv, CommandModule } from 'yargs'; +import {Arguments, Argv, CommandModule} from 'yargs'; import chalk from 'chalk'; import process from 'process'; -import { getEnvironmentByPath } from './configuration/environment-resolution.js'; -import { - BUILT_IN_ENVIRONMENTS, - LLM_OUTPUT_DIR, -} from './configuration/constants.js'; -import { UserFacingError } from './utils/errors.js'; -import { existsSync, rmSync } from 'fs'; -import { readFile, readdir } from 'fs/promises'; -import { join } from 'path'; -import { glob } from 'tinyglobby'; -import { LlmResponseFile } from './shared-interfaces.js'; -import { - setupProjectStructure, - writeResponseFiles, -} from './orchestration/file-system.js'; -import { serveApp } from './workers/serve-testing/serve-app.js'; -import { ProgressLogger, ProgressType } from './progress/progress-logger.js'; -import { formatTitleCard, redX } from './reporting/format.js'; -import { NoopProgressLogger } from './progress/noop-progress-logger.js'; -import { LocalEnvironment } from './configuration/environment-local.js'; +import {getEnvironmentByPath} from './configuration/environment-resolution.js'; +import {BUILT_IN_ENVIRONMENTS, LLM_OUTPUT_DIR} from './configuration/constants.js'; +import {UserFacingError} from './utils/errors.js'; +import {existsSync, rmSync} from 'fs'; +import {readFile, readdir} from 'fs/promises'; +import {join} from 'path'; +import {glob} from 'tinyglobby'; +import {LlmResponseFile} from './shared-interfaces.js'; +import {setupProjectStructure, writeResponseFiles} from './orchestration/file-system.js'; +import {serveApp} from './workers/serve-testing/serve-app.js'; +import {ProgressLogger, ProgressType} from './progress/progress-logger.js'; +import {formatTitleCard, redX} from './reporting/format.js'; +import {NoopProgressLogger} from './progress/noop-progress-logger.js'; +import {LocalEnvironment} from './configuration/environment-local.js'; export const RunModule = { builder, @@ -64,32 +58,26 @@ async function handler(options: Arguments): Promise { } async function runApp(options: Options) { - const { environment, rootPromptDef, files } = await resolveConfig(options); + const {environment, rootPromptDef, files} = await resolveConfig(options); const progress = new ErrorOnlyProgressLogger(); if (!(environment instanceof LocalEnvironment)) { - console.error( - `${redX()} Unable to run eval app locally for a remote environment.` - ); + console.error(`${redX()} Unable to run eval app locally for a remote environment.`); return; } console.log( - `Setting up the "${environment.displayName}" environment with the "${rootPromptDef.name}" prompt...` + `Setting up the "${environment.displayName}" environment with the "${rootPromptDef.name}" prompt...`, ); - const { directory, cleanup } = await setupProjectStructure( - environment, - rootPromptDef, - progress - ); + const {directory, cleanup} = await setupProjectStructure(environment, rootPromptDef, progress); - const processExitPromise = new Promise((resolve) => { + const processExitPromise = new Promise(resolve => { const done = () => { () => { try { // Note: we don't use `cleanup` here, because the call needs to be synchronous. - rmSync(directory, { recursive: true }); + rmSync(directory, {recursive: true}); } catch {} resolve(); }; @@ -108,11 +96,11 @@ async function runApp(options: Options) { rootPromptDef, directory, new NoopProgressLogger(), - async (url) => { + async url => { console.log(); console.log(formatTitleCard(`🎉 App is up and running at ${url}`)); await processExitPromise; - } + }, ); } finally { await cleanup(); @@ -127,24 +115,24 @@ async function resolveConfig(options: Options) { ' - Pass a path to an environment config file using the `--env` flag.', ' - Pass `--env=angular-example` or `--env=solid-example` to use one of our built-in example environments.', ' - Pass `--help` to see all available options.', - ].join('\n') + ].join('\n'), ); } else if (!options.prompt) { throw new UserFacingError( '`--prompt` flag has not been specified. ' + - 'You have to pass a prompt name through the `--prompt` flag.' + 'You have to pass a prompt name through the `--prompt` flag.', ); } const environment = await getEnvironmentByPath( BUILT_IN_ENVIRONMENTS.get(options.environment) || options.environment, - 'genkit' + 'genkit', ); const environmentDir = join(LLM_OUTPUT_DIR, environment.id); if (!existsSync(environmentDir)) { throw new UserFacingError( - `Could not find any LLM output for environment "${environment.displayName}" under "${environmentDir}"` + `Could not find any LLM output for environment "${environment.displayName}" under "${environmentDir}"`, ); } @@ -154,41 +142,37 @@ async function resolveConfig(options: Options) { throw new UserFacingError( `There is no local LLM output for environment "${options.prompt}".\n` + `The following prompts have local data:\n` + - prompts.map((p) => ` - ${p}`).join('\n') + prompts.map(p => ` - ${p}`).join('\n'), ); } - const rootPromptDef = environment.executablePrompts.find( - (p) => p.name === options.prompt - ); + const rootPromptDef = environment.executablePrompts.find(p => p.name === options.prompt); if (!rootPromptDef) { throw new UserFacingError( `Environment "${environment.displayName}" does not have a prompt with a name of "${options.prompt}".\n` + `The following prompts are available:\n` + - environment.executablePrompts.map((p) => ` - ${p.name}`).join('\n') + environment.executablePrompts.map(p => ` - ${p.name}`).join('\n'), ); } const promptDir = join(environmentDir, options.prompt); - const filePaths = await glob('**/*', { cwd: promptDir }); + const filePaths = await glob('**/*', {cwd: promptDir}); const files: LlmResponseFile[] = await Promise.all( - filePaths.map(async (path) => { + filePaths.map(async path => { return { filePath: path, code: await readFile(join(promptDir, path), 'utf8'), }; - }) + }), ); - return { environment, rootPromptDef, files }; + return {environment, rootPromptDef, files}; } async function getPossiblePrompts(environmentDir: string): Promise { - const entities = await readdir(environmentDir, { withFileTypes: true }); - return entities - .filter((entity) => entity.isDirectory()) - .map((entity) => entity.name); + const entities = await readdir(environmentDir, {withFileTypes: true}); + return entities.filter(entity => entity.isDirectory()).map(entity => entity.name); } class ErrorOnlyProgressLogger implements ProgressLogger { diff --git a/runner/shared-interfaces.ts b/runner/shared-interfaces.ts index ede7222..597d807 100644 --- a/runner/shared-interfaces.ts +++ b/runner/shared-interfaces.ts @@ -1,12 +1,9 @@ -import type { - BuildErrorType, - BuildResult, -} from './workers/builder/builder-types.js'; -import type { REPORT_VERSION } from './configuration/constants.js'; -import type { UserJourneysResult } from './orchestration/user-journeys.js'; -import type { AutoRateResult } from './ratings/autoraters/auto-rate-shared.js'; -import type { Rating, RatingCategory } from './ratings/rating-types.js'; -import type { ServeTestingResult } from './workers/serve-testing/worker-types.js'; +import type {BuildErrorType, BuildResult} from './workers/builder/builder-types.js'; +import type {REPORT_VERSION} from './configuration/constants.js'; +import type {UserJourneysResult} from './orchestration/user-journeys.js'; +import type {AutoRateResult} from './ratings/autoraters/auto-rate-shared.js'; +import type {Rating, RatingCategory} from './ratings/rating-types.js'; +import type {ServeTestingResult} from './workers/serve-testing/worker-types.js'; /** * Represents a single prompt definition and extra metadata for it. @@ -277,7 +274,7 @@ export interface AggregatedRunStats { appsWithoutErrorsAfterRepair: number; appsWithoutErrors: number; }; - security?: { appsWithErrors: number; appsWithoutErrors: number }; + security?: {appsWithErrors: number; appsWithoutErrors: number}; } export interface CompletionStats { @@ -361,7 +358,7 @@ export interface RunDetails { /** Information about configured MCP servers, if any. */ mcp?: { /** MCP servers that were configured. */ - servers: { name: string; command: string; args: string[] }[]; + servers: {name: string; command: string; args: string[]}[]; /** Logs produced by all of the servers. */ logs: string; diff --git a/runner/testing/browser-agent/index.ts b/runner/testing/browser-agent/index.ts index a08528e..6e3563d 100644 --- a/runner/testing/browser-agent/index.ts +++ b/runner/testing/browser-agent/index.ts @@ -1,6 +1,6 @@ -import { spawn } from 'child_process'; -import { executeCommand } from '../../utils/exec.js'; -import { UserFacingError } from '../../utils/errors.js'; +import {spawn} from 'child_process'; +import {executeCommand} from '../../utils/exec.js'; +import {UserFacingError} from '../../utils/errors.js'; let pendingDepInstall: Promise | null = null; @@ -8,7 +8,7 @@ export async function runPythonAgentScript( taskFile: string, hostUrl: string, abortSignal: AbortSignal, - opts?: { printLogOutput?: boolean } + opts?: {printLogOutput?: boolean}, ): Promise { const processDir = import.meta.dirname; @@ -38,18 +38,18 @@ export async function runPythonAgentScript( }); const output: Buffer[] = []; - child.stdio[3]!.on('data', (data) => { + child.stdio[3]!.on('data', data => { output.push(data); }); - child.stdout!.on('data', (data) => { + child.stdout!.on('data', data => { if (opts?.printLogOutput) { process.stderr.write(data); } }); const stderrOutput: Buffer[] = []; - child.stderr!.on('data', (data) => { + child.stderr!.on('data', data => { if (opts?.printLogOutput) { process.stderr.write(data); } @@ -57,18 +57,16 @@ export async function runPythonAgentScript( }); return await new Promise((resolve, reject) => { - child.on('close', (code) => { + child.on('close', code => { if (code !== 0) { const stderr = Buffer.concat(stderrOutput).toString('utf8'); - reject( - new Error(`Process exited with code ${code}.\n\nStderr:\n${stderr}`) - ); + reject(new Error(`Process exited with code ${code}.\n\nStderr:\n${stderr}`)); return; } const outputFd3 = Buffer.concat(output).toString('utf8'); resolve(outputFd3); }); - child.on('error', (err) => { + child.on('error', err => { reject(`Error when spawning: ${err}`); }); }); @@ -77,13 +75,10 @@ export async function runPythonAgentScript( async function installPythonDependencies(processDir: string): Promise { try { await executeCommand('uv pip install browser-use', processDir); - await executeCommand( - 'uvx playwright install chromium --with-deps', - processDir - ); + await executeCommand('uvx playwright install chromium --with-deps', processDir); } catch (e) { throw new UserFacingError( - `Failed to install user journey agent dependencies in ${processDir}\n` + e + `Failed to install user journey agent dependencies in ${processDir}\n` + e, ); } } diff --git a/runner/testing/browser-agent/models.ts b/runner/testing/browser-agent/models.ts index 2741b44..14372c6 100644 --- a/runner/testing/browser-agent/models.ts +++ b/runner/testing/browser-agent/models.ts @@ -2,7 +2,7 @@ * IMPORTANT: KEEP these interfaces in sync WITH `main.py` and `models.py`. */ -import { UserJourneyDefinition } from '../../orchestration/user-journeys.js'; +import {UserJourneyDefinition} from '../../orchestration/user-journeys.js'; export interface BrowserAgentTaskInput { userJourneys: UserJourneyDefinition[]; diff --git a/runner/utils/binary-exists.ts b/runner/utils/binary-exists.ts index 93033b6..314f29a 100644 --- a/runner/utils/binary-exists.ts +++ b/runner/utils/binary-exists.ts @@ -1,8 +1,8 @@ -import { exec } from 'child_process'; +import {exec} from 'child_process'; /** Determines if a specific binary exists on the local machine. */ export function binaryExists(name: string): Promise { - return new Promise((resolve) => { - exec(`which ${name}`, (error) => resolve(!error)); + return new Promise(resolve => { + exec(`which ${name}`, error => resolve(!error)); }); } diff --git a/runner/utils/exec.ts b/runner/utils/exec.ts index e774529..7869a73 100644 --- a/runner/utils/exec.ts +++ b/runner/utils/exec.ts @@ -1,4 +1,4 @@ -import { exec } from 'node:child_process'; +import {exec} from 'node:child_process'; /** * Runs a command in a specific directory. @@ -13,9 +13,9 @@ export function executeCommand( opts: { forwardStderrToParent?: boolean; forwardStdoutToParent?: boolean; - notifyWhenMatchingStdout?: { notifyFn: () => void; pattern: RegExp }; + notifyWhenMatchingStdout?: {notifyFn: () => void; pattern: RegExp}; abortSignal?: AbortSignal; - } = {} + } = {}, ): Promise { return new Promise((resolve, reject) => { const proc = exec(command, { @@ -31,24 +31,21 @@ export function executeCommand( let stderr = ''; let notifyWhenMatchingStdout = opts.notifyWhenMatchingStdout; - proc.on('error', (err) => { + proc.on('error', err => { reject(err); }); - proc.stdout!.on('data', (c) => { + proc.stdout!.on('data', c => { stdout += c; if (opts.forwardStdoutToParent) { process.stdout.write(c); } - if ( - notifyWhenMatchingStdout && - notifyWhenMatchingStdout.pattern.test(stdout) - ) { + if (notifyWhenMatchingStdout && notifyWhenMatchingStdout.pattern.test(stdout)) { notifyWhenMatchingStdout.notifyFn(); notifyWhenMatchingStdout = undefined; } }); - proc.stderr!.on('data', (c) => { + proc.stderr!.on('data', c => { stderr += c; if (opts.forwardStderrToParent) { process.stderr.write(c); diff --git a/runner/utils/kill-gracefully.ts b/runner/utils/kill-gracefully.ts index b922c11..5249685 100644 --- a/runner/utils/kill-gracefully.ts +++ b/runner/utils/kill-gracefully.ts @@ -1,9 +1,9 @@ -import { ChildProcess } from 'child_process'; +import {ChildProcess} from 'child_process'; import treeKill from 'tree-kill'; function treeKillPromise(pid: number, signal: string): Promise { return new Promise((resolve, reject) => { - treeKill(pid, signal, (err) => { + treeKill(pid, signal, err => { if (err !== undefined) { reject(err); } else { @@ -15,7 +15,7 @@ function treeKillPromise(pid: number, signal: string): Promise { export function killChildProcessGracefully( child: ChildProcess, - timeoutInMs = 1000 * 10 // 10s + timeoutInMs = 1000 * 10, // 10s ): Promise { return new Promise(async (resolve, reject) => { // Process already exited. @@ -39,23 +39,15 @@ export function killChildProcessGracefully( try { await treeKillPromise(pid, 'SIGTERM'); } catch (e) { - console.error( - `Could not send "SIGTERM" for killing process. Trying "SIGKILL".` - ); + console.error(`Could not send "SIGTERM" for killing process. Trying "SIGKILL".`); } // Start a timeout for the SIGKILL fallback - const sigkillTimeoutId = setTimeout( - () => treeKill(pid, 'SIGKILL'), - timeoutInMs - ); + const sigkillTimeoutId = setTimeout(() => treeKill(pid, 'SIGKILL'), timeoutInMs); // Start another timeout to reject the promise if the child process never fires `exit` for some reasons. const rejectTimeoutId = setTimeout( - () => - reject( - new Error('Child process did not exit gracefully within the timeout.') - ), - timeoutInMs * 2 + () => reject(new Error('Child process did not exit gracefully within the timeout.')), + timeoutInMs * 2, ); }); } diff --git a/runner/utils/timeout.ts b/runner/utils/timeout.ts index 4a880bf..bff6fff 100644 --- a/runner/utils/timeout.ts +++ b/runner/utils/timeout.ts @@ -6,7 +6,7 @@ export class TimeoutError extends Error {} export async function callWithTimeout( description: string, fn: (signal: AbortSignal) => Promise, - timeoutInMin: number + timeoutInMin: number, ): Promise { const abortController = new AbortController(); let timeoutID: NodeJS.Timeout | null = null; @@ -18,7 +18,7 @@ export async function callWithTimeout( // Trigger abort signal to cleanup/kill e.g. processes behind a timeout. abortController.abort(); }, - 1000 * 60 * timeoutInMin + 1000 * 60 * timeoutInMin, ); }); diff --git a/runner/workers/builder/builder-types.ts b/runner/workers/builder/builder-types.ts index 77a2fe0..3ede3f4 100644 --- a/runner/workers/builder/builder-types.ts +++ b/runner/workers/builder/builder-types.ts @@ -1,4 +1,4 @@ -import { PackageSummary } from '@safety-web/types'; +import {PackageSummary} from '@safety-web/types'; /** * Represents the message structure used for communication between diff --git a/runner/workers/builder/worker.ts b/runner/workers/builder/worker.ts index 78abf4d..c62a1b7 100644 --- a/runner/workers/builder/worker.ts +++ b/runner/workers/builder/worker.ts @@ -1,7 +1,7 @@ -import { delimiter, join } from 'path'; -import { redX } from '../../reporting/format.js'; -import { executeCommand } from '../../utils/exec.js'; -import { callWithTimeout } from '../../utils/timeout.js'; +import {delimiter, join} from 'path'; +import {redX} from '../../reporting/format.js'; +import {executeCommand} from '../../utils/exec.js'; +import {callWithTimeout} from '../../utils/timeout.js'; import { BuildErrorType, BuildResultStatus, @@ -9,26 +9,26 @@ import { BuildWorkerResponseMessage, } from './builder-types.js'; -import { run as runSafetyWeb } from '@safety-web/runner'; -import { PackageSummary } from '@safety-web/types'; +import {run as runSafetyWeb} from '@safety-web/runner'; +import {PackageSummary} from '@safety-web/types'; process.on('message', async (message: BuildWorkerMessage) => { - const { appName, directory, buildCommand } = message; + const {appName, directory, buildCommand} = message; try { // Run the build command inside the temporary project directory await callWithTimeout( `Building ${appName}`, - (abortSignal) => + abortSignal => executeCommand( buildCommand, directory, { PATH: `${process.env['PATH']}${delimiter}${join(directory, 'node_modules/.bin')}`, }, - { abortSignal } + {abortSignal}, ), - 4 // 4min. This is a safety boundary. Lots of parallelism can slow-down. + 4, // 4min. This is a safety boundary. Lots of parallelism can slow-down. ); } catch (error: any) { const cleanErrorMessage = cleanupBuildMessage(error.message); @@ -54,20 +54,13 @@ process.on('message', async (message: BuildWorkerMessage) => { // Run the safety-web runner on the temporary project directory const safetyWebSummaries = await callWithTimeout( `SAFETY WEB ${appName}`, - (_abortSignal) => - runSafetyWeb( - directory, - /* processPrivatePackages */ true, - /* useDefaultTSConfig */ true - ), - 4 + _abortSignal => + runSafetyWeb(directory, /* processPrivatePackages */ true, /* useDefaultTSConfig */ true), + 4, ); safetyWebReportJson = new Array(...safetyWebSummaries); } catch (error: any) { - console.error( - `${redX()} Could not create safety web report for \`${appName}\``, - error - ); + console.error(`${redX()} Could not create safety web report for \`${appName}\``, error); } process.send!({ @@ -112,10 +105,7 @@ function classifyBuildError(errorMessage: string): BuildErrorType { if (/\[ERROR\]\s*NG\d+/.test(errorMessage)) { return BuildErrorType.ANGULAR_DIAGNOSTIC; } - if ( - /\[ERROR\]\s*TS\d+/.test(errorMessage) || - errorMessage.includes('Type error') - ) { + if (/\[ERROR\]\s*TS\d+/.test(errorMessage) || errorMessage.includes('Type error')) { return BuildErrorType.TYPESCRIPT_ERROR; } return BuildErrorType.OTHER; diff --git a/runner/workers/serve-testing/auto-csp.ts b/runner/workers/serve-testing/auto-csp.ts index efe8772..2c44f36 100644 --- a/runner/workers/serve-testing/auto-csp.ts +++ b/runner/workers/serve-testing/auto-csp.ts @@ -1,7 +1,7 @@ -import puppeteer, { Protocol } from 'puppeteer'; +import puppeteer, {Protocol} from 'puppeteer'; import fetch from 'node-fetch'; -import { StrictCsp } from 'strict-csp'; -import { CspViolation } from './auto-csp-types.js'; +import {StrictCsp} from 'strict-csp'; +import {CspViolation} from './auto-csp-types.js'; /** * Stores metadata about a script parsed by the browser's debugger. @@ -30,35 +30,29 @@ export class AutoCsp { const client = await page.createCDPSession(); await client.send('Debugger.enable'); - client.on( - 'Debugger.scriptParsed', - async (event: Protocol.Debugger.ScriptParsedEvent) => { - if (!event.url) { - return; - } - try { - const { scriptSource } = await client.send( - 'Debugger.getScriptSource', - { - scriptId: event.scriptId, - } - ); - - const info: ScriptInfo = { - url: event.url, - source: scriptSource, - startLine: event.startLine, - endLine: event.endLine, - }; - - const existing = this.scriptInfosByUrl.get(event.url) ?? []; - existing.push(info); - this.scriptInfosByUrl.set(event.url, existing); - } catch (e) { - // This can happen for certain browser-internal scripts. We can ignore them. - } + client.on('Debugger.scriptParsed', async (event: Protocol.Debugger.ScriptParsedEvent) => { + if (!event.url) { + return; } - ); + try { + const {scriptSource} = await client.send('Debugger.getScriptSource', { + scriptId: event.scriptId, + }); + + const info: ScriptInfo = { + url: event.url, + source: scriptSource, + startLine: event.startLine, + endLine: event.endLine, + }; + + const existing = this.scriptInfosByUrl.get(event.url) ?? []; + existing.push(info); + this.scriptInfosByUrl.set(event.url, existing); + } catch (e) { + // This can happen for certain browser-internal scripts. We can ignore them. + } + }); } /** @@ -107,7 +101,7 @@ export class AutoCsp { console.error('Could not parse CSP report:', e); } // Respond to the request so the browser doesn't hang - await request.respond({ status: 204 }); + await request.respond({status: 204}); } private addCodeSnippetToReport(report: CspViolation): void { @@ -126,9 +120,7 @@ export class AutoCsp { } // Find the specific script block that contains the violation line. - const script = scriptInfos.find( - (s) => lineNumber >= s.startLine && lineNumber <= s.endLine - ); + const script = scriptInfos.find(s => lineNumber >= s.startLine && lineNumber <= s.endLine); if (script) { const lines = script.source.split('\n'); @@ -148,9 +140,7 @@ export class AutoCsp { } } - private async handleNavigation( - request: puppeteer.HTTPRequest - ): Promise { + private async handleNavigation(request: puppeteer.HTTPRequest): Promise { try { const response = await fetch(request.url(), { headers: request.headers(), diff --git a/runner/workers/serve-testing/browser-agent.ts b/runner/workers/serve-testing/browser-agent.ts index 1f2d150..fde413a 100644 --- a/runner/workers/serve-testing/browser-agent.ts +++ b/runner/workers/serve-testing/browser-agent.ts @@ -1,44 +1,36 @@ -import { mkdtemp, writeFile } from 'node:fs/promises'; -import { tmpdir } from 'os'; -import { join } from 'path'; -import { runPythonAgentScript } from '../../testing/browser-agent/index.js'; -import { - AgentOutput, - BrowserAgentTaskInput, -} from '../../testing/browser-agent/models.js'; -import { callWithTimeout } from '../../utils/timeout.js'; -import { binaryExists } from '../../utils/binary-exists.js'; -import { UserFacingError } from '../../utils/errors.js'; -import { ServeTestingProgressLogFn } from './worker-types.js'; +import {mkdtemp, writeFile} from 'node:fs/promises'; +import {tmpdir} from 'os'; +import {join} from 'path'; +import {runPythonAgentScript} from '../../testing/browser-agent/index.js'; +import {AgentOutput, BrowserAgentTaskInput} from '../../testing/browser-agent/models.js'; +import {callWithTimeout} from '../../utils/timeout.js'; +import {binaryExists} from '../../utils/binary-exists.js'; +import {UserFacingError} from '../../utils/errors.js'; +import {ServeTestingProgressLogFn} from './worker-types.js'; export async function runBrowserAgentUserJourneyTests( appName: string, hostUrl: string, agentTask: BrowserAgentTaskInput, - progressLog: ServeTestingProgressLogFn + progressLog: ServeTestingProgressLogFn, ): Promise { - const tmpDir = await mkdtemp( - join(tmpdir(), 'browser-agent-user-journey-task-') - ); + const tmpDir = await mkdtemp(join(tmpdir(), 'browser-agent-user-journey-task-')); const taskJsonFile = join(tmpDir, 'task.json'); await writeFile(taskJsonFile, JSON.stringify(agentTask)); - const [hasPython, hasUv] = await Promise.all([ - binaryExists('python3'), - binaryExists('uv'), - ]); + const [hasPython, hasUv] = await Promise.all([binaryExists('python3'), binaryExists('uv')]); const docsLink = 'https://docs.browser-use.com/quickstart'; progressLog('eval', 'Starting User Journey testing'); if (!hasPython) { throw new UserFacingError( - `Cannot run user journey testing, because Python is not installed. See ${docsLink}.` + `Cannot run user journey testing, because Python is not installed. See ${docsLink}.`, ); } if (!hasUv) { throw new UserFacingError( - `Cannot run user journey testing, because \`uv\` is not installed. See ${docsLink}.` + `Cannot run user journey testing, because \`uv\` is not installed. See ${docsLink}.`, ); } @@ -47,22 +39,18 @@ export async function runBrowserAgentUserJourneyTests( const resultStdout = await callWithTimeout( `User journey testing for ${appName}`, - (abortSignal) => + abortSignal => runPythonAgentScript(taskJsonFile, hostUrl, abortSignal, { printLogOutput: false, }), - 4 // 4min + 4, // 4min ); const deltaTime = Math.ceil((performance.now() - startTime) / 1000); progressLog('eval', 'Completed user journey testing', `(${deltaTime}s)`); return JSON.parse(resultStdout.trim()) as AgentOutput; } catch (e) { - progressLog( - 'error', - 'Error while running user journey browser tests', - e + '' - ); + progressLog('error', 'Error while running user journey browser tests', e + ''); return null; } } diff --git a/runner/workers/serve-testing/puppeteer.ts b/runner/workers/serve-testing/puppeteer.ts index dfb4aa8..1e1e8c3 100644 --- a/runner/workers/serve-testing/puppeteer.ts +++ b/runner/workers/serve-testing/puppeteer.ts @@ -1,10 +1,10 @@ -import { AxePuppeteer } from '@axe-core/puppeteer'; -import { Result } from 'axe-core'; +import {AxePuppeteer} from '@axe-core/puppeteer'; +import {Result} from 'axe-core'; import puppeteer from 'puppeteer'; -import { callWithTimeout } from '../../utils/timeout.js'; -import { AutoCsp } from './auto-csp.js'; -import { CspViolation } from './auto-csp-types.js'; -import { ServeTestingProgressLogFn } from './worker-types.js'; +import {callWithTimeout} from '../../utils/timeout.js'; +import {AutoCsp} from './auto-csp.js'; +import {CspViolation} from './auto-csp-types.js'; +import {ServeTestingProgressLogFn} from './worker-types.js'; /** * Uses Puppeteer to take a screenshot of the main page, perform Axe testing, @@ -17,7 +17,7 @@ export async function runAppInPuppeteer( takeScreenshots: boolean, includeAxeTesting: boolean, progressLog: ServeTestingProgressLogFn, - enableAutoCsp: boolean + enableAutoCsp: boolean, ) { const runtimeErrors: string[] = []; @@ -38,18 +38,15 @@ export async function runAppInPuppeteer( }); const page = await browser.newPage(); - page.on('console', async (message) => { + page.on('console', async message => { if (message.type() !== 'error') return; if (!message.text().includes('JSHandle@error')) { - progressLog( - 'error', - `${message.type().substring(0, 3).toUpperCase()} ${message.text()}` - ); + progressLog('error', `${message.type().substring(0, 3).toUpperCase()} ${message.text()}`); return; } const messages = await Promise.all( - message.args().map(async (arg) => { + message.args().map(async arg => { const [message, stack] = await Promise.all([ arg.getProperty('message'), arg.getProperty('stack'), @@ -63,24 +60,24 @@ export async function runAppInPuppeteer( result += (result.length ? '\n\n' : '') + stack; } return result; - }) + }), ); runtimeErrors.push(messages.filter(Boolean).join('\n')); }); - page.on('pageerror', (error) => { + page.on('pageerror', error => { progressLog('error', 'Page error', error.message); runtimeErrors.push(error.toString()); }); - await page.setViewport({ width: 1280, height: 720 }); + await page.setViewport({width: 1280, height: 720}); // Set up auto-CSP handling if enabled for the environment. if (enableAutoCsp) { const autoCsp = new AutoCsp(); await autoCsp.connectToDevTools(page); await page.setRequestInterception(true); - page.on('request', async (request) => { + page.on('request', async request => { if (request.isInterceptResolutionHandled()) { return; } @@ -123,11 +120,7 @@ export async function runAppInPuppeteer( progressLog('success', `No Axe violations found.`); } } catch (axeError: any) { - progressLog( - 'error', - 'Could not perform Axe accessibility test', - axeError.message - ); + progressLog('error', 'Could not perform Axe accessibility test', axeError.message); } } @@ -142,7 +135,7 @@ export async function runAppInPuppeteer( fullPage: true, encoding: 'base64', }), - 1 // 1 minute + 1, // 1 minute ); progressLog('success', 'Screenshot captured and encoded'); } @@ -157,5 +150,5 @@ export async function runAppInPuppeteer( progressLog('error', 'Could not take screenshot', details); } - return { screenshotBase64Data, runtimeErrors, axeViolations, cspViolations }; + return {screenshotBase64Data, runtimeErrors, axeViolations, cspViolations}; } diff --git a/runner/workers/serve-testing/serve-app.ts b/runner/workers/serve-testing/serve-app.ts index bfebba9..990af3f 100644 --- a/runner/workers/serve-testing/serve-app.ts +++ b/runner/workers/serve-testing/serve-app.ts @@ -1,25 +1,25 @@ -import { ChildProcess, exec } from 'child_process'; -import { killChildProcessGracefully } from '../../utils/kill-gracefully.js'; -import { cleanupBuildMessage } from '../builder/worker.js'; -import { ProgressLogger } from '../../progress/progress-logger.js'; -import { RootPromptDefinition } from '../../shared-interfaces.js'; +import {ChildProcess, exec} from 'child_process'; +import {killChildProcessGracefully} from '../../utils/kill-gracefully.js'; +import {cleanupBuildMessage} from '../builder/worker.js'; +import {ProgressLogger} from '../../progress/progress-logger.js'; +import {RootPromptDefinition} from '../../shared-interfaces.js'; export async function serveApp( serveCommand: string, rootPromptDef: RootPromptDefinition, appDirectoryPath: string, progress: ProgressLogger, - logicWhileServing: (serveUrl: string) => Promise + logicWhileServing: (serveUrl: string) => Promise, ): Promise { let serveProcess: ChildProcess | null = null; try { - serveProcess = exec(serveCommand, { cwd: appDirectoryPath }); + serveProcess = exec(serveCommand, {cwd: appDirectoryPath}); progress.log( rootPromptDef, 'eval', 'Launching app inside a browser', - `(PID: ${serveProcess.pid})` + `(PID: ${serveProcess.pid})`, ); const actualPort = await new Promise((resolvePort, rejectPort) => { @@ -27,8 +27,8 @@ export async function serveApp( const timeoutId = setTimeout(() => { rejectPort( new Error( - `Serving process for \`${rootPromptDef.name}\` timed out waiting for port information after ${serveStartTimeout / 1000}s.` - ) + `Serving process for \`${rootPromptDef.name}\` timed out waiting for port information after ${serveStartTimeout / 1000}s.`, + ), ); }, serveStartTimeout); @@ -47,21 +47,17 @@ export async function serveApp( if (match && match[1]) { clearTimeout(timeoutId); const port = parseInt(match[1], 10); - progress.log( - rootPromptDef, - 'eval', - `App is up and running on port ${port}` - ); + progress.log(rootPromptDef, 'eval', `App is up and running on port ${port}`); portResolved = true; resolvePort(port); } } }; - serveProcess!.stdout?.on('data', (data) => processOutput(data)); - serveProcess!.stderr?.on('data', (data) => processOutput(data)); + serveProcess!.stdout?.on('data', data => processOutput(data)); + serveProcess!.stderr?.on('data', data => processOutput(data)); - serveProcess!.on('error', (err) => { + serveProcess!.on('error', err => { clearTimeout(timeoutId); progress.log(rootPromptDef, 'error', 'Failed to launch app', err + ''); rejectPort(err); @@ -75,20 +71,15 @@ export async function serveApp( if (code !== 0 && code !== null) { rejectPort( new Error( - `Launch process for \`${rootPromptDef.name}\` exited prematurely with code ${code}, signal ${signal}. Output: ${outputBuffer.slice(-500)}` - ) + `Launch process for \`${rootPromptDef.name}\` exited prematurely with code ${code}, signal ${signal}. Output: ${outputBuffer.slice(-500)}`, + ), ); - } else if ( - code === null && - signal && - signal !== 'SIGTERM' && - signal !== 'SIGINT' - ) { + } else if (code === null && signal && signal !== 'SIGTERM' && signal !== 'SIGINT') { // SIGTERM/SIGINT is expected for our kill rejectPort( new Error( - `Launch process for \`${rootPromptDef.name}\` was killed by unexpected signal ${signal} before port resolution. Output: ${outputBuffer.slice(-500)}` - ) + `Launch process for \`${rootPromptDef.name}\` was killed by unexpected signal ${signal} before port resolution. Output: ${outputBuffer.slice(-500)}`, + ), ); } }); @@ -103,7 +94,7 @@ export async function serveApp( rootPromptDef, 'eval', 'Terminating browser process for app', - `(PID: ${serveProcess.pid})` + `(PID: ${serveProcess.pid})`, ); await killChildProcessGracefully(serveProcess); serveProcess = null; diff --git a/runner/workers/serve-testing/worker-types.ts b/runner/workers/serve-testing/worker-types.ts index f1f3534..fe2db9c 100644 --- a/runner/workers/serve-testing/worker-types.ts +++ b/runner/workers/serve-testing/worker-types.ts @@ -1,10 +1,7 @@ -import { ProgressType } from '../../progress/progress-logger.js'; -import { - AgentOutput, - BrowserAgentTaskInput, -} from '../../testing/browser-agent/models.js'; -import { Result } from 'axe-core'; -import { CspViolation } from './auto-csp-types.js'; +import {ProgressType} from '../../progress/progress-logger.js'; +import {AgentOutput, BrowserAgentTaskInput} from '../../testing/browser-agent/models.js'; +import {Result} from 'axe-core'; +import {CspViolation} from './auto-csp-types.js'; /** * Represents the message structure used for communication between @@ -58,7 +55,7 @@ export interface ServeTestingProgressLogMessage { export type ServeTestingProgressLogFn = ( state: ProgressType, message: string, - details?: string + details?: string, ) => void; export type ServeTestingWorkerResponseMessage = diff --git a/runner/workers/serve-testing/worker.ts b/runner/workers/serve-testing/worker.ts index 64a6fd0..eb5c5f5 100644 --- a/runner/workers/serve-testing/worker.ts +++ b/runner/workers/serve-testing/worker.ts @@ -1,9 +1,9 @@ -import { ProgressType } from '../../progress/progress-logger.js'; -import { AgentOutput } from '../../testing/browser-agent/models.js'; -import { callWithTimeout } from '../../utils/timeout.js'; -import { CspViolation } from '../serve-testing/auto-csp-types.js'; -import { runBrowserAgentUserJourneyTests } from '../serve-testing/browser-agent.js'; -import { runAppInPuppeteer } from '../serve-testing/puppeteer.js'; +import {ProgressType} from '../../progress/progress-logger.js'; +import {AgentOutput} from '../../testing/browser-agent/models.js'; +import {callWithTimeout} from '../../utils/timeout.js'; +import {CspViolation} from '../serve-testing/auto-csp-types.js'; +import {runBrowserAgentUserJourneyTests} from '../serve-testing/browser-agent.js'; +import {runAppInPuppeteer} from '../serve-testing/puppeteer.js'; import { ServeTestingProgressLogMessage, ServeTestingResult, @@ -21,14 +21,10 @@ process.on('message', async (message: ServeTestingWorkerMessage) => { userJourneyAgentTaskInput, } = message; const runtimeErrors: string[] = []; - const progressLog = ( - state: ProgressType, - message: string, - details?: string - ) => { + const progressLog = (state: ProgressType, message: string, details?: string) => { process.send!({ type: 'log', - payload: { state, message, details }, + payload: {state, message, details}, } satisfies ServeTestingProgressLogMessage); }; @@ -48,9 +44,9 @@ process.on('message', async (message: ServeTestingWorkerMessage) => { !!takeScreenshots, !!includeAxeTesting, progressLog, - !!enableAutoCsp + !!enableAutoCsp, ), - 4 // 4min + 4, // 4min ); screenshotBase64Data = puppeteerResult.screenshotBase64Data; @@ -64,7 +60,7 @@ process.on('message', async (message: ServeTestingWorkerMessage) => { appName, serveUrl, userJourneyAgentTaskInput, - progressLog + progressLog, ); }