Skip to content

Commit de46a7b

Browse files
authored
refactor(core): aiAssert use the same implements as aiBoolean (#992)
1 parent c0ae07b commit de46a7b

File tree

10 files changed

+87
-39
lines changed

10 files changed

+87
-39
lines changed

packages/core/src/ai-model/action-executor.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ export class Executor {
6363
}
6464
}
6565

66-
async flush(): Promise<any> {
66+
async flush(): Promise<{ output: any; thought?: string } | undefined> {
6767
if (this.status === 'init' && this.tasks.length > 0) {
6868
console.warn(
6969
'illegal state for executor, status is init but tasks are not empty',
@@ -178,7 +178,11 @@ export class Executor {
178178
if (this.tasks.length) {
179179
// return the last output
180180
const outputIndex = Math.min(taskIndex, this.tasks.length - 1);
181-
return this.tasks[outputIndex].output;
181+
const { thought, output } = this.tasks[outputIndex];
182+
return {
183+
thought,
184+
output,
185+
};
182186
}
183187
}
184188

packages/core/src/ai-model/inspect.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,13 @@ export async function AiExtractElementInfo<
427427
},
428428
];
429429

430+
if (options.extractOption?.returnThought) {
431+
msgs.push({
432+
role: 'user',
433+
content: 'Please provide reasons.',
434+
});
435+
}
436+
430437
if (multimodalPrompt) {
431438
const addOns = await promptsToChatParam({
432439
images: multimodalPrompt.images,

packages/core/src/ai-model/prompt/extraction.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@ If a key specifies a JSON data type (such as Number, String, Boolean, Object, Ar
1111
1212
If the user provides multiple reference images, please carefully review the reference images with the screenshot and provide the correct answer for <DATA_DEMAND>.
1313
14+
If the user requests reasons to be provided, please provide the thought field in response, less then 100 words.
15+
1416
Return in the following JSON format:
1517
{
18+
thought: string, // the thought process of the extraction, less then 100 words, not required by default.
1619
data: any, // the extracted data. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.
1720
errors: [], // string[], error message if any
1821
}

packages/core/src/insight/index.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,11 @@ export default class Insight<
242242
dataDemand: InsightExtractParam,
243243
opt?: InsightExtractOption,
244244
multimodalPrompt?: TMultimodalPrompt,
245-
): Promise<any> {
245+
): Promise<{
246+
data: T;
247+
thought?: string;
248+
usage?: AIUsageInfo;
249+
}> {
246250
assert(
247251
typeof dataDemand === 'object' || typeof dataDemand === 'string',
248252
`dataDemand should be object or string, but get ${typeof dataDemand}`,
@@ -283,7 +287,7 @@ export default class Insight<
283287
error: errorLog,
284288
};
285289

286-
const { data } = parseResult || {};
290+
const { data, thought } = parseResult || {};
287291

288292
// 4
289293
emitInsightDump(
@@ -300,6 +304,7 @@ export default class Insight<
300304

301305
return {
302306
data,
307+
thought,
303308
usage,
304309
};
305310
}

packages/core/src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ export type AIElementResponse =
8282
export interface AIDataExtractionResponse<DataDemand> {
8383
data: DataDemand;
8484
errors?: string[];
85+
thought?: string;
8586
}
8687

8788
export interface AISectionLocatorResponse {

packages/core/src/yaml.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ export interface LocateOption {
99
export interface InsightExtractOption {
1010
domIncluded?: boolean | 'visible-only';
1111
screenshotIncluded?: boolean;
12+
returnThought?: boolean;
1213
}
1314

1415
export interface ReferenceImage {

packages/core/tests/unit-test/executor/index.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ describe(
103103
const dump = executor.dump();
104104
expect(dump.logTime).toBeTruthy();
105105

106-
expect(flushResult).toBe(flushResultData);
106+
expect(flushResult?.output).toBe(flushResultData);
107107
});
108108

109109
it('insight - init and append', async () => {
@@ -177,7 +177,7 @@ describe(
177177
expect(executor.status).toBe('error');
178178
expect(executor.latestErrorTask()).toBeTruthy();
179179
expect(executor.isInErrorState()).toBeTruthy();
180-
expect(r).toEqual('error-output');
180+
expect(r?.output).toEqual('error-output');
181181

182182
// expect to throw an error
183183
expect(async () => {

packages/core/tests/unit-test/prompt/__snapshots__/prompt.test.ts.snap

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,11 @@ If a key specifies a JSON data type (such as Number, String, Boolean, Object, Ar
3636
3737
If the user provides multiple reference images, please carefully review the reference images with the screenshot and provide the correct answer for <DATA_DEMAND>.
3838
39+
If the user requests reasons to be provided, please provide the thought field in response, less then 100 words.
40+
3941
Return in the following JSON format:
4042
{
43+
thought: string, // the thought process of the extraction, less then 100 words, not required by default.
4144
data: any, // the extracted data. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.
4245
errors: [], // string[], error message if any
4346
}

packages/web-integration/src/common/agent.ts

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -616,17 +616,25 @@ export class PageAgent<PageType extends WebPage = WebPage> {
616616
}
617617

618618
async aiAssert(assertion: TUserPrompt, msg?: string, opt?: AgentAssertOpt) {
619-
const { output, executor } = await this.taskExecutor.assert(assertion);
619+
const { output, executor, thought } = await this.taskExecutor.assert(
620+
assertion,
621+
{
622+
returnThought: true,
623+
},
624+
);
620625
await this.afterTaskRunning(executor, true);
621626

622-
if (output && opt?.keepRawResponse) {
623-
return output;
627+
if (opt?.keepRawResponse) {
628+
return {
629+
pass: output,
630+
thought,
631+
};
624632
}
625633

626-
if (!output?.pass) {
634+
if (!output) {
627635
const errMsg = msg || `Assertion failed: ${assertion}`;
628636
const reasonMsg = `Reason: ${
629-
output?.thought || executor.latestErrorTask()?.error || '(no_reason)'
637+
thought || executor.latestErrorTask()?.error || '(no_reason)'
630638
}`;
631639
throw new Error(`${errMsg}\n${reasonMsg}`);
632640
}

packages/web-integration/src/common/tasks.ts

Lines changed: 44 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ import {
6565

6666
interface ExecutionResult<OutputType = any> {
6767
output: OutputType;
68+
thought?: string;
6869
executor: Executor;
6970
}
7071

@@ -1042,8 +1043,9 @@ export class PageTaskExecutor {
10421043
const { tasks } = await this.convertPlanToExecutable(plans, opts);
10431044
await taskExecutor.append(tasks);
10441045
const result = await taskExecutor.flush();
1046+
const { output } = result!;
10451047
return {
1046-
output: result,
1048+
output,
10471049
executor: taskExecutor,
10481050
};
10491051
}
@@ -1085,7 +1087,8 @@ export class PageTaskExecutor {
10851087

10861088
// plan
10871089
await taskExecutor.append(planningTask);
1088-
const planResult: PlanningAIResponse = await taskExecutor.flush();
1090+
const result = await taskExecutor.flush();
1091+
const planResult: PlanningAIResponse = result?.output;
10891092
if (taskExecutor.isInErrorState()) {
10901093
return {
10911094
output: planResult,
@@ -1167,13 +1170,19 @@ export class PageTaskExecutor {
11671170
const planningTask: ExecutionTaskPlanningApply =
11681171
this.planningTaskToGoal(userPrompt);
11691172
await taskExecutor.append(planningTask);
1170-
const output = await taskExecutor.flush();
1173+
const result = await taskExecutor.flush();
11711174
if (taskExecutor.isInErrorState()) {
11721175
return {
11731176
output: undefined,
11741177
executor: taskExecutor,
11751178
};
11761179
}
1180+
if (!result) {
1181+
throw new Error(
1182+
'result of taskExecutor.flush() is undefined in function actionToGoal',
1183+
);
1184+
}
1185+
const { output } = result;
11771186
const plans = output.actions;
11781187
yamlFlow.push(...(output.yamlFlow || []));
11791188
let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;
@@ -1211,7 +1220,7 @@ export class PageTaskExecutor {
12111220
}
12121221

12131222
private async createTypeQueryTask<T>(
1214-
type: 'Query' | 'Boolean' | 'Number' | 'String',
1223+
type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',
12151224
demand: InsightExtractParam,
12161225
opt?: InsightExtractOption,
12171226
multimodalPrompt?: TMultimodalPrompt,
@@ -1254,7 +1263,7 @@ export class PageTaskExecutor {
12541263
};
12551264
}
12561265

1257-
const { data, usage } = await this.insight.extract<any>(
1266+
const { data, usage, thought } = await this.insight.extract<any>(
12581267
demandInput,
12591268
opt,
12601269
multimodalPrompt,
@@ -1270,14 +1279,25 @@ export class PageTaskExecutor {
12701279
output: outputResult,
12711280
log: { dump: insightDump },
12721281
usage,
1282+
thought,
12731283
};
12741284
},
12751285
};
12761286

12771287
await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
1278-
const output = await taskExecutor.flush();
1288+
const result = await taskExecutor.flush();
1289+
1290+
if (!result) {
1291+
throw new Error(
1292+
'result of taskExecutor.flush() is undefined in function createTypeQueryTask',
1293+
);
1294+
}
1295+
1296+
const { output, thought } = result;
1297+
12791298
return {
12801299
output,
1300+
thought,
12811301
executor: taskExecutor,
12821302
};
12831303
}
@@ -1330,27 +1350,15 @@ export class PageTaskExecutor {
13301350

13311351
async assert(
13321352
assertion: TUserPrompt,
1333-
): Promise<ExecutionResult<InsightAssertionResponse>> {
1334-
const description = `assert: ${typeof assertion === 'string' ? assertion : assertion.prompt}`;
1335-
const taskExecutor = new Executor(taskTitleStr('Assert', description), {
1336-
onTaskStart: this.onTaskStartCallback,
1337-
});
1338-
const assertionPlan: PlanningAction<PlanningActionParamAssert> = {
1339-
type: 'Assert',
1340-
param: {
1341-
assertion,
1342-
},
1343-
locate: null,
1344-
};
1345-
const { tasks } = await this.convertPlanToExecutable([assertionPlan]);
1346-
1347-
await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
1348-
const output: InsightAssertionResponse = await taskExecutor.flush();
1349-
1350-
return {
1351-
output,
1352-
executor: taskExecutor,
1353-
};
1353+
opt?: InsightExtractOption,
1354+
): Promise<ExecutionResult<boolean>> {
1355+
const { textPrompt, multimodalPrompt } = parsePrompt(assertion);
1356+
return await this.createTypeQueryTask<boolean>(
1357+
'Assert',
1358+
textPrompt,
1359+
opt,
1360+
multimodalPrompt,
1361+
);
13541362
}
13551363

13561364
/**
@@ -1436,7 +1444,15 @@ export class PageTaskExecutor {
14361444
await taskExecutor.append(
14371445
this.prependExecutorWithScreenshot(assertTasks[0]),
14381446
);
1439-
const output: InsightAssertionResponse = await taskExecutor.flush();
1447+
const result = await taskExecutor.flush();
1448+
1449+
if (!result) {
1450+
throw new Error(
1451+
'result of taskExecutor.flush() is undefined in function waitFor',
1452+
);
1453+
}
1454+
1455+
const { output } = result as { output: InsightAssertionResponse };
14401456

14411457
if (output?.pass) {
14421458
return {

0 commit comments

Comments
 (0)