Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions report-app/src/app/shared/ai-assistant/ai-assistant.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
<div class="title-row">
<h2>AI Assistant</h2>
</div>
</div>
<div class="header-actions">
<div class="model-selector-container">
<select [(ngModel)]="selectedModel">
@for (model of models(); track model) {
<option [value]="model">{{ model }}</option>
}
</select>
</div>
</div>
<div class="header-actions">
<button class="expand-button" (click)="toggleExpanded()">
<span class="material-symbols-outlined">
@if (isExpanded()) {
Expand Down
16 changes: 4 additions & 12 deletions report-app/src/app/shared/ai-assistant/ai-assistant.scss
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,10 @@
color: var(--text-secondary);

select {
background-color: var(--background-color);
color: var(--text-primary);
border: 1px solid var(--border-color);
border-radius: 6px;
font-size: 13px;
padding: 2px 6px;
cursor: pointer;

&:focus {
outline: none;
border-color: var(--accent-blue);
}
padding-left: 6px;
width: 100%;
font-size: 0.8rem;
height: 1.5rem;
}
}

Expand Down
5 changes: 4 additions & 1 deletion report-app/src/app/shared/ai-assistant/ai-assistant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ export class AiAssistant {

protected readonly aiConfigState = httpResource<AIConfigState>(() => '/api/ai-config-state');
protected readonly models = computed(() => this.aiConfigState.value()?.configuredModels ?? []);
protected selectedModel = linkedSignal(() => this.models()[0]);
protected selectedModel = linkedSignal(
// By default, select Gemini 2.5 Flash if available (it's fast enough for chat + can reason well).
() => this.models().find(m => m === 'gemini-2.5-flash') ?? this.models()[0],
);

protected toggleExpanded(): void {
this.isExpanded.set(!this.isExpanded());
Expand Down
1 change: 0 additions & 1 deletion runner/orchestration/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import {
} from '../configuration/constants.js';
import {Environment} from '../configuration/environment.js';
import {rateGeneratedCode} from '../ratings/rate-code.js';
import {chatWithReportAI} from '../reporting/report-ai-chat.js';
import {redX} from '../reporting/format.js';
import {
AssessmentResult,
Expand Down
2 changes: 1 addition & 1 deletion runner/ratings/stats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import {
} from '../shared-interfaces.js';

/** Possible buckets that scores can be categorized into. */
const BUCKET_CONFIG = [
export const BUCKET_CONFIG = [
{name: 'Excellent', min: 98, max: 100, id: 'excellent'},
{name: 'Great', min: 85, max: 97, id: 'great'},
{name: 'Good', min: 71, max: 84, id: 'good'},
Expand Down
18 changes: 16 additions & 2 deletions runner/reporting/report-ai-chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import {
IndividualAssessmentState,
} from '../shared-interfaces.js';
import {BuildResultStatus} from '../workers/builder/builder-types.js';
import {BUCKET_CONFIG} from '../ratings/stats.js';
import {POINTS_FOR_CATEGORIES} from '../ratings/rating-types.js';

export const reportLlmEvalsToolContext = `## What is a report?
A report consists of many apps that were LLM generated. You will have information
Expand All @@ -15,6 +17,17 @@ about checks that failed for this LLM generated app.
Note that there may be multiple attempts for an app. E.g. an initial build may fail and
another attempt might have repaired the build failure. The last attempt reflects the final
state of the app. E.g. whether it does build, or if there are runtime errors.

## Scoring mechanism
Apps are rated based on their scores in the following buckets:
${BUCKET_CONFIG.map(b => `* ${b.name}: ${b.min}-${b.max}`).join('\n')}

The overall score of an app is determined based on score reductions.
There are three pillars: ${Object.keys(POINTS_FOR_CATEGORIES).join(', ')}
Pillars are a split up of a 100% perfect score, allowing for individual ratings
to be less impactful than others. The pillars are distributed as follows:
${Object.entries(POINTS_FOR_CATEGORIES).map(e => `* ${e[0]}: ${e[1]} points.`)}
Within pillars, the available score can be reduced by individual ratings.
`;

const defaultAiChatPrompt = `Strictly follow the instructions here.
Expand Down Expand Up @@ -86,7 +99,7 @@ export function serializeReportForPrompt(assessments: AssessmentResult[]): strin
`
Name: ${app.promptDef.name}
Score: ${app.score.totalPoints}/${app.score.maxOverallPoints}
Failed checks: ${JSON.stringify(
Failed checks/ratings: ${JSON.stringify(
app.score.categories
.flatMap(category => category.assessments)
.filter(
Expand All @@ -95,7 +108,8 @@ Failed checks: ${JSON.stringify(
)
.map(c => ({
description: c.description,
points: `${(c.successPercentage * 100).toFixed(2)}/100`,
category: c.category,
scoreReduction: c.scoreReduction,
message: c.message,
})),
null,
Expand Down
Loading