Skip to content

Commit 085cfc0

Browse files
committed
Fix compile error
1 parent 55bfb4e commit 085cfc0

File tree

3 files changed

+38
-8
lines changed

3 files changed

+38
-8
lines changed

specification/0.9/eval/src/basic_schema_matcher.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,19 @@ import { SchemaMatcher, ValidationResult } from "./schema_matcher";
1919
export class BasicSchemaMatcher extends SchemaMatcher {
2020
constructor(
2121
public propertyPath: string,
22-
public propertyValue?: any,
22+
public propertyValue?: any
2323
) {
2424
super();
2525
}
2626

27+
get description(): string {
28+
return `Basic match for property '${this.propertyPath}'${
29+
this.propertyValue !== undefined
30+
? ` with value '${JSON.stringify(this.propertyValue)}'`
31+
: ""
32+
}`;
33+
}
34+
2735
validate(schema: any): ValidationResult {
2836
if (!schema) {
2937
const result: ValidationResult = {
@@ -54,7 +62,7 @@ export class BasicSchemaMatcher extends SchemaMatcher {
5462
const error = `Property '${
5563
this.propertyPath
5664
}' has value '${JSON.stringify(
57-
actualValue,
65+
actualValue
5866
)}', but expected '${JSON.stringify(this.propertyValue)}'.`;
5967
return { success: false, error };
6068
}
@@ -69,6 +77,10 @@ export class AnySchemaMatcher extends SchemaMatcher {
6977
super();
7078
}
7179

80+
get description(): string {
81+
return `Any of: [${this.matchers.map((m) => m.description).join(", ")}]`;
82+
}
83+
7284
validate(schema: any): ValidationResult {
7385
const errors: string[] = [];
7486
for (const matcher of this.matchers) {
@@ -93,6 +105,10 @@ export class AllSchemaMatcher extends SchemaMatcher {
93105
super();
94106
}
95107

108+
get description(): string {
109+
return `All of: [${this.matchers.map((m) => m.description).join(", ")}]`;
110+
}
111+
96112
validate(schema: any): ValidationResult {
97113
for (const matcher of this.matchers) {
98114
const result = matcher.validate(schema);

specification/0.9/eval/src/index.ts

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ function generateSummary(
110110

111111
let totalModelFailedRuns = 0;
112112

113-
for (const promptName in promptsInModel) {
113+
const sortedPromptNames = Object.keys(promptsInModel).sort();
114+
for (const promptName of sortedPromptNames) {
114115
const runs = promptsInModel[promptName];
115116
const totalRuns = runs.length;
116117
const failedRuns = runs.filter(
@@ -132,7 +133,15 @@ function generateSummary(
132133
}
133134

134135
const totalRunsForModel = resultsByModel[modelName].length;
135-
summary += `\n\n**Total failed runs:** ${totalModelFailedRuns} / ${totalRunsForModel}`;
136+
const successPercentage =
137+
totalRunsForModel === 0
138+
? "0.0"
139+
: (
140+
((totalRunsForModel - totalModelFailedRuns) / totalRunsForModel) *
141+
100.0
142+
).toFixed(1);
143+
144+
summary += `\n\n**Total failed runs:** ${totalModelFailedRuns} / ${totalRunsForModel} (${successPercentage}% success)`;
136145
}
137146

138147
summary += "\n\n---\n\n## Overall Summary\n";
@@ -150,7 +159,13 @@ function generateSummary(
150159
].join(", ");
151160

152161
summary += `\n- **Total tool failures:** ${totalToolErrorRuns} / ${totalRuns}`;
153-
summary += `\n- **Number of runs with any failure (tool error or validation):** ${totalRunsWithAnyFailure} / ${totalRuns}`;
162+
const successPercentage =
163+
totalRuns === 0
164+
? "0.0"
165+
: (((totalRuns - totalRunsWithAnyFailure) / totalRuns) * 100.0).toFixed(
166+
1
167+
);
168+
summary += `\n- **Number of runs with any failure (tool error or validation):** ${totalRunsWithAnyFailure} / ${totalRuns} (${successPercentage}% success)`;
154169
const latencies = results.map((r) => r.latency).sort((a, b) => a - b);
155170
const totalLatency = latencies.reduce((acc, l) => acc + l, 0);
156171
const meanLatency = (totalLatency / totalRuns).toFixed(0);

specification/0.9/eval/src/prompts.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -723,7 +723,6 @@ Each activity in the inner lists should be a 'Row' containing a 'CheckBox' (to m
723723
promptText: `Create a job application form. It should have 'TextField's for "Name", "Email", "Phone", "Resume URL". A 'MultipleChoice' for "Years of Experience" (options: "0-1", "2-5", "5+"). A 'Button' "Submit Application". Create these as static components, not data bound.`,
724724
matchers: [
725725
new MessageTypeMatcher("updateComponents"),
726-
new SurfaceUpdateSchemaMatcher("Text", "text", "Job Application"),
727726
new SurfaceUpdateSchemaMatcher("TextField", "label", "Name"),
728727
new SurfaceUpdateSchemaMatcher("TextField", "label", "Email"),
729728
new SurfaceUpdateSchemaMatcher("TextField", "label", "Resume URL"),
@@ -734,8 +733,8 @@ Each activity in the inner lists should be a 'Row' containing a 'CheckBox' (to m
734733
name: "courseSyllabus",
735734
description: "A course syllabus outline.",
736735
promptText: `First, generate a 'createSurface' message with surfaceId 'main'. Then, generate a 'updateComponents' message for a course syllabus. 'Text' (h1) "Introduction to Computer Science". 'List' of modules.
737-
- Module 1: 'Card' with 'Text' "Algorithms" and 'List' ("Sorting", "Searching").
738-
- Module 2: 'Card' with 'Text' "Data Structures" and 'List' ("Arrays", "Linked Lists").`,
736+
- For module 1, a 'Card' with 'Text' "Algorithms" and 'List' ("Sorting", "Searching").
737+
- For module 2, a 'Card' with 'Text' "Data Structures" and 'List' ("Arrays", "Linked Lists").`,
739738
matchers: [
740739
new MessageTypeMatcher("updateComponents"),
741740
new SurfaceUpdateSchemaMatcher(

0 commit comments

Comments
 (0)