Skip to content

Commit 55bfb4e

Browse files
committed
Improve eval performance and debugging
1 parent 73f60fd commit 55bfb4e

File tree

7 files changed

+182
-157
lines changed

7 files changed

+182
-157
lines changed

specification/0.9/eval/src/flows.ts

Lines changed: 11 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -62,52 +62,20 @@ export const componentGeneratorFlow = ai.defineFlow(
6262
.map((s: any) => JSON.stringify(s, null, 2))
6363
.join("\n\n");
6464

65-
const fullPrompt = `You are an AI assistant. Based on the following request, generate a JSON object that conforms to the provided JSON Schemas. The output MUST be ONLY the JSON object enclosed in a markdown code block.
66-
67-
DO NOT include any other text before or after the markdown code block.
68-
69-
Example Output:
70-
\`\`\`json
71-
{
72-
"updateComponents": {
73-
"surfaceId": "contact_form_1",
74-
"components": [
75-
{
76-
"id": "root",
77-
"props": {
78-
"component": "Column",
79-
"children": [
80-
"first_name_label",
81-
"first_name_field"
82-
]
83-
}
84-
},
85-
{
86-
"id": "first_name_label",
87-
"props": {
88-
"component": "Text",
89-
"text": "First Name"
90-
}
91-
},
92-
{
93-
"id": "first_name_field",
94-
"props": {
95-
"component": "TextField",
96-
"label": "First Name",
97-
"text": { "path": "/contact/firstName" },
98-
"textFieldType": "shortText"
99-
}
100-
}
101-
]
102-
}
103-
}
104-
\`\`\`
65+
const fullPrompt = `You are an AI assistant. Based on the following request, generate a stream of JSON messages that conform to the provided JSON Schemas.
66+
The output MUST be a series of JSON objects, each enclosed in a markdown code block (or a single block with multiple objects).
10567
106-
Request:
107-
${prompt}
68+
Standard Instructions:
69+
1. Always start by generating a 'createSurface' message with surfaceId 'main'.
70+
2. Then, generate a 'updateComponents' message with surfaceId 'main' containing the requested UI.
71+
3. Ensure all component children are referenced by ID (using the 'children' or 'child' property with IDs), NOT nested inline as objects.
72+
4. If the request involves data binding, you may also generate 'updateDataModel' messages.
10873
109-
JSON Schemas:
74+
Schemas:
11075
${schemaDefs}
76+
77+
Request:
78+
${prompt}
11179
`;
11280
const estimatedInputTokens = Math.ceil(fullPrompt.length / 2.5);
11381
await rateLimiter.acquirePermit(

specification/0.9/eval/src/message_type_matcher.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import { SchemaMatcher, ValidationResult } from "./schema_matcher";
2020
* A concrete matcher that verifies the top-level message type.
2121
*/
2222
export class MessageTypeMatcher extends SchemaMatcher {
23-
constructor(private messageType: string) {
23+
constructor(public messageType: string) {
2424
super();
2525
}
2626

@@ -45,6 +45,6 @@ export class MessageTypeMatcher extends SchemaMatcher {
4545
}
4646

4747
get description(): string {
48-
return `Expected top-level message type to be '${this.messageType}'`;
48+
return `Message type '${this.messageType}'`;
4949
}
5050
}

specification/0.9/eval/src/prompts.ts

Lines changed: 116 additions & 99 deletions
Large diffs are not rendered by default.

specification/0.9/eval/src/schema_matcher.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,14 @@ export interface ValidationResult {
2020
}
2121

2222
export abstract class SchemaMatcher {
23+
/**
24+
* A human-readable description of what this matcher is looking for.
25+
*/
26+
abstract get description(): string;
27+
28+
/**
29+
* Validates that the given schema satisfies the matcher's criteria.
30+
* @param schema The schema to validate (e.g. a message object).
31+
*/
2332
abstract validate(schema: any): ValidationResult;
2433
}

specification/0.9/eval/src/surface_update_schema_matcher.ts

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,21 @@ export class SurfaceUpdateSchemaMatcher extends SchemaMatcher {
3131
super();
3232
}
3333

34+
get description(): string {
35+
let desc = `Component '${this.componentType}'`;
36+
if (this.propertyName) {
37+
desc += ` with property '${this.propertyName}'`;
38+
if (this.propertyValue !== undefined) {
39+
if (typeof this.propertyValue === "function") {
40+
desc += ` matching predicate`;
41+
} else {
42+
desc += ` matching ${JSON.stringify(this.propertyValue)}`;
43+
}
44+
}
45+
}
46+
return desc;
47+
}
48+
3449
private getComponentById(components: any[], id: string): any | undefined {
3550
return components.find((c: any) => c.id === id);
3651
}
@@ -128,7 +143,12 @@ export class SurfaceUpdateSchemaMatcher extends SchemaMatcher {
128143
: s1 === s2;
129144
};
130145

131-
// 1. Direct Primitive Match (Shorthand)
146+
// Predicate Function Match
147+
if (typeof expectedValue === "function") {
148+
return expectedValue(actualValue);
149+
}
150+
151+
// Direct Primitive Match (Shorthand)
132152
if (typeof actualValue === "string" && typeof expectedValue === "string") {
133153
return compareStrings(actualValue, expectedValue);
134154
}
@@ -142,7 +162,7 @@ export class SurfaceUpdateSchemaMatcher extends SchemaMatcher {
142162
return actualValue === expectedValue;
143163
}
144164

145-
// 2. Object with Path (Should not match a literal expected value usually, unless expectedValue is the path object)
165+
// Object with Path (Should not match a literal expected value usually, unless expectedValue is the path object)
146166
if (
147167
typeof actualValue === "object" &&
148168
!Array.isArray(actualValue) &&
@@ -152,7 +172,7 @@ export class SurfaceUpdateSchemaMatcher extends SchemaMatcher {
152172
return false;
153173
}
154174

155-
// 3. Array Match (e.g. MultipleChoice options)
175+
// Array Match (e.g. MultipleChoice options)
156176
if (Array.isArray(actualValue)) {
157177
for (const item of actualValue) {
158178
// Direct match in array

specification/0.9/eval/src/validator.ts

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import path from "path";
2020
import { SurfaceUpdateSchemaMatcher } from "./surface_update_schema_matcher";
2121
import { SchemaMatcher } from "./schema_matcher";
2222

23-
const ajv = new Ajv({ strict: false });
23+
const ajv = new Ajv({ strict: false, verbose: true });
2424

2525
const schemaDir = path.resolve(process.cwd(), "../");
2626
const serverToClientSchema = JSON.parse(
@@ -58,7 +58,16 @@ export function validateSchema(
5858
if (!valid) {
5959
if (validate.errors) {
6060
validate.errors.forEach((err) => {
61-
errors.push(`AJV: ${err.instancePath} ${err.message}`);
61+
let errorMsg = `AJV [Message ${messages.indexOf(message)}]: ${
62+
err.instancePath
63+
} ${err.message}`;
64+
if (err.params) {
65+
errorMsg += ` | Params: ${JSON.stringify(err.params)}`;
66+
}
67+
if (err.data !== undefined) {
68+
errorMsg += ` | Data: ${JSON.stringify(err.data)}`;
69+
}
70+
errors.push(errorMsg);
6271
});
6372
}
6473
}
@@ -97,7 +106,7 @@ export function validateSchema(
97106
// Actually, let's just say "Matcher failed".
98107
// If we really want, we could change SchemaMatcher to have a description.
99108
// But for now:
100-
errors.push(`Matcher failed to match any message in the stream.`);
109+
errors.push(`Matcher failed: ${matcher.description}`);
101110
}
102111
}
103112
}
@@ -239,7 +248,7 @@ function validateComponent(
239248
for (const prop of props) {
240249
if (properties[prop] === undefined) {
241250
errors.push(
242-
`Component '${id}' of type '${componentType}' is missing required property '${prop}'.`
251+
`Component ${JSON.stringify(id)} of type '${componentType}' is missing required property '${prop}'.`
243252
);
244253
}
245254
}
@@ -249,7 +258,7 @@ function validateComponent(
249258
for (const id of ids) {
250259
if (id && !allIds.has(id)) {
251260
errors.push(
252-
`Component '${id}' references non-existent component ID '${id}'.`
261+
`Component ${JSON.stringify(id)} references non-existent component ID.`
253262
);
254263
}
255264
}

specification/0.9/json/standard_catalog_definition.json

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -199,14 +199,15 @@
199199
},
200200
"distribution": {
201201
"type": "string",
202-
"description": "Defines the arrangement of children along the main axis (horizontally). This corresponds to the CSS 'justify-content' property.",
202+
"description": "Defines the arrangement of children along the main axis (horizontally). This corresponds to the CSS 'justify-content' property, but uses camelCase values (e.g., 'spaceBetween').",
203203
"enum": [
204204
"center",
205205
"end",
206206
"spaceAround",
207207
"spaceBetween",
208208
"spaceEvenly",
209-
"start"
209+
"start",
210+
"stretch"
210211
]
211212
},
212213
"alignment": {
@@ -228,14 +229,15 @@
228229
},
229230
"distribution": {
230231
"type": "string",
231-
"description": "Defines the arrangement of children along the main axis (vertically). This corresponds to the CSS 'justify-content' property.",
232+
"description": "Defines the arrangement of children along the main axis (vertically). This corresponds to the CSS 'justify-content' property, but uses camelCase values (e.g., 'spaceBetween').",
232233
"enum": [
233234
"start",
234235
"center",
235236
"end",
236237
"spaceBetween",
237238
"spaceAround",
238-
"spaceEvenly"
239+
"spaceEvenly",
240+
"stretch"
239241
]
240242
},
241243
"alignment": {
@@ -433,7 +435,7 @@
433435
"component": { "const": "DateTimeInput" },
434436
"value": {
435437
"$ref": "common_types.json#/$defs/stringOrPath",
436-
"description": "The selected date and/or time value."
438+
"description": "The selected date and/or time value. If not yet set, initialize with an empty string."
437439
},
438440
"enableDate": {
439441
"type": "boolean",

0 commit comments

Comments
 (0)