Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions runner/orchestration/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,10 @@ export async function generateCodeAndAssess(options: {
appConcurrencyQueue.add(
async () => {
const evalID = await env.gateway.initializeEval();
let results: AssessmentResult[] | undefined;

try {
return await callWithTimeout(
results = await callWithTimeout(
`Evaluation of ${rootPromptDef.name}`,
async abortSignal =>
startEvaluationTask(
Expand All @@ -183,6 +184,7 @@ export async function generateCodeAndAssess(options: {
// 10min max per app evaluation. We just want to make sure it never gets stuck.
10,
);
return results;
} catch (e: unknown) {
failedPrompts.push({
promptName: rootPromptDef.name,
Expand All @@ -198,8 +200,7 @@ export async function generateCodeAndAssess(options: {
progress.log(rootPromptDef, 'error', 'Failed to evaluate code', details);
return [] satisfies AssessmentResult[];
} finally {
progress.log(rootPromptDef, 'done', 'Done');

progress.evalFinished(rootPromptDef, results || []);
await env.gateway.finalizeEval(evalID);
}
},
Expand Down
57 changes: 39 additions & 18 deletions runner/progress/dynamic-progress-logger.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {MultiBar, SingleBar, Presets} from 'cli-progress';
import chalk from 'chalk';
import {RootPromptDefinition} from '../shared-interfaces.js';
import {AssessmentResult, RootPromptDefinition} from '../shared-interfaces.js';
import {ProgressLogger, ProgressType, progressTypeToIcon} from './progress-logger.js';
import {redX} from '../reporting/format.js';

Expand All @@ -13,6 +13,8 @@ export class DynamicProgressLogger implements ProgressLogger {
private pendingBars = new Map<RootPromptDefinition, SingleBar>();
private spinnerFrames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
private currentSpinnerFrame = 0;
private completedEvals = 0;
private totalScore = 0;
private spinnerInterval: ReturnType<typeof setInterval> | undefined;
private errors: {
prompt: RootPromptDefinition;
Expand Down Expand Up @@ -46,10 +48,17 @@ export class DynamicProgressLogger implements ProgressLogger {
);

// Bar that tracks how many prompts are completed in total.
this.totalBar = this.wrapper.create(total, 0, undefined, {
format: '{bar} {spinner} {value}/{total} prompts completed',
barsize: PREFIX_WIDTH,
});
this.totalBar = this.wrapper.create(
total,
0,
{
additionalInfo: '',
},
{
format: '{bar} {spinner} {value}/{total} prompts completed{additionalInfo}',
barsize: PREFIX_WIDTH,
},
);

// Interval to update the spinner.
this.spinnerInterval = setInterval(() => {
Expand All @@ -74,6 +83,7 @@ export class DynamicProgressLogger implements ProgressLogger {
this.wrapper?.stop();
this.pendingBars.clear();
this.wrapper = this.totalBar = this.spinnerInterval = undefined;
this.completedEvals = this.totalScore = 0;

for (const error of this.errors) {
let message = `${redX()} [${error.prompt.name}] ${error.message}`;
Expand All @@ -91,17 +101,6 @@ export class DynamicProgressLogger implements ProgressLogger {

let bar = this.pendingBars.get(prompt);

// Drop the bar from the screen if it's complete.
if (type === 'done') {
this.pendingBars.delete(prompt);

if (bar) {
this.totalBar.increment();
this.wrapper.remove(bar);
}
return;
}

// Capture errors for static printing once the dynamic progress is hidden.
if (type === 'error') {
this.errors.push({prompt, message, details});
Expand All @@ -117,14 +116,36 @@ export class DynamicProgressLogger implements ProgressLogger {
if (bar) {
bar.update(0, payload);
} else {
const bar = this.wrapper.create(1, 0, payload);
bar = this.wrapper.create(1, 0, payload);
this.pendingBars.set(prompt, bar);
}
}

evalFinished(prompt: RootPromptDefinition, results: AssessmentResult[]): void {
const bar = this.pendingBars.get(prompt);
this.pendingBars.delete(prompt);

for (const result of results) {
this.completedEvals++;
this.totalScore += (result.score.totalPoints / result.score.maxOverallPoints) * 100;
}

if (this.completedEvals > 0) {
this.totalBar?.increment(1, {
additionalInfo: `, ${Math.round(this.totalScore / this.completedEvals)}% score on average`,
});
} else {
this.totalBar?.increment();
}

// Drop the bar from the screen if it's complete.
if (bar) {
this.wrapper?.remove(bar);
}
}

private getColorFunction(type: ProgressType): (value: string) => string {
switch (type) {
case 'done':
case 'success':
case 'serve-testing':
case 'build':
Expand Down
1 change: 1 addition & 0 deletions runner/progress/noop-progress-logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ export class NoopProgressLogger implements ProgressLogger {
initialize(): void {}
finalize(): void {}
log(): void {}
evalFinished(): void {}
}
20 changes: 9 additions & 11 deletions runner/progress/progress-logger.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
import {greenCheckmark, redX} from '../reporting/format.js';
import {RootPromptDefinition} from '../shared-interfaces.js';
import {AssessmentResult, RootPromptDefinition} from '../shared-interfaces.js';

/** Possible progress event types. */
export type ProgressType =
| 'codegen'
| 'build'
| 'serve-testing'
| 'success'
| 'error'
| 'eval'
| 'done';
export type ProgressType = 'codegen' | 'build' | 'serve-testing' | 'success' | 'error' | 'eval';

/** Maps a ProgressType to an icon that can represent it. */
export function progressTypeToIcon(type: ProgressType): string {
Expand All @@ -27,8 +20,6 @@ export function progressTypeToIcon(type: ProgressType): string {
return redX();
case 'eval':
return '🔎';
case 'done':
return '🏁';
}
}

Expand All @@ -43,6 +34,13 @@ export interface ProgressLogger {
/** Stops the logging process. */
finalize(): void;

/**
* Logs when an individual eval has finished.
* @param prompt Prompt associated with the event.
* @param results Assessment results for the prompt.
*/
evalFinished(prompt: RootPromptDefinition, results: AssessmentResult[]): void;

/**
* Logs a progress event to the logger.
* @param prompt Prompt associated with the event.
Expand Down
12 changes: 5 additions & 7 deletions runner/progress/text-progress-logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,11 @@ export class TextProgressLogger implements ProgressLogger {

log(prompt: RootPromptDefinition, type: ProgressType, message: string, details?: string): void {
const icon = progressTypeToIcon(type);

if (type === 'done') {
// It's handy to know how many apps are done when one completes.
const suffix = `(${++this.done}/${this.total})`;
details = details ? `${details} ${suffix}` : suffix;
}

console.log(`[${prompt.name}] ${icon} ${message} ${details || ''}`.trim());
}

evalFinished(prompt: RootPromptDefinition): void {
// It's handy to know how many apps are done when one completes.
console.log(`[${prompt.name}] 🏁 Done (${++this.done}/${this.total})`.trim());
}
}
1 change: 1 addition & 0 deletions runner/run-cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ async function getPossiblePrompts(environmentDir: string): Promise<string[]> {
class ErrorOnlyProgressLogger implements ProgressLogger {
initialize(): void {}
finalize(): void {}
evalFinished(): void {}

log(_: unknown, type: ProgressType, message: string, details?: string) {
if (type === 'error') {
Expand Down
Loading