Skip to content

Commit 0ec93a3

Browse files
authored
feat(web-integration): add executeAction method to various classes and update action handling logic (#1059)
* feat(web-integration): add executeAction method to various classes and update action handling logic * feat(web-integration): refactor action handling by introducing executeActionForPage utility across multiple classes
1 parent 3336332 commit 0ec93a3

File tree

11 files changed

+115
-147
lines changed

11 files changed

+115
-147
lines changed

packages/android/src/page/index.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import fs from 'node:fs';
44
import { createRequire } from 'node:module';
55
import path from 'node:path';
66
import { type Point, type Size, getAIConfig } from '@midscene/core';
7-
import type { DeviceAction, PageType } from '@midscene/core';
7+
import type { DeviceAction, ExecutorContext, PageType } from '@midscene/core';
88
import { getTmpFile, sleep } from '@midscene/core/utils';
99
import {
1010
MIDSCENE_ADB_PATH,
@@ -17,7 +17,10 @@ import { isValidPNGImageBuffer, resizeImg } from '@midscene/shared/img';
1717
import { getDebug } from '@midscene/shared/logger';
1818
import { repeat } from '@midscene/shared/utils';
1919
import type { AndroidDeviceInputOpt, AndroidDevicePage } from '@midscene/web';
20-
import { commonWebActionsForWebPage } from '@midscene/web/utils';
20+
import {
21+
commonWebActionsForWebPage,
22+
executeActionForPage,
23+
} from '@midscene/web/utils';
2124

2225
import { ADB } from 'appium-adb';
2326

@@ -147,6 +150,14 @@ export class AndroidDevice implements AndroidDevicePage {
147150
return allActions;
148151
}
149152

153+
async executeAction<T = unknown>(
154+
actionName: string,
155+
context: ExecutorContext,
156+
param: T,
157+
): Promise<void> {
158+
return executeActionForPage(this, actionName, context, param);
159+
}
160+
150161
constructor(deviceId: string, options?: AndroidDeviceOpt) {
151162
assert(deviceId, 'deviceId is required for AndroidDevice');
152163

packages/web-integration/src/bridge-mode/agent-cli-side.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { PageAgent, type PageAgentOpt } from '@/common/agent';
22
import type { KeyboardAction, MouseAction } from '@/page';
3+
import type { ExecutorContext } from '@midscene/core';
34
import { assert } from '@midscene/shared/utils';
45
import {
56
type BridgeConnectTabOptions,
@@ -14,6 +15,11 @@ import type { ExtensionBridgePageBrowserSide } from './page-browser-side';
1415

1516
interface ChromeExtensionPageCliSide extends ExtensionBridgePageBrowserSide {
1617
showStatusMessage: (message: string) => Promise<void>;
18+
executeAction: <T = unknown>(
19+
actionName: string,
20+
context: ExecutorContext,
21+
param: T,
22+
) => Promise<void>;
1723
}
1824

1925
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
@@ -42,6 +48,17 @@ export const getBridgePageInCliSide = (
4248
showStatusMessage: async (message: string) => {
4349
await server.call(BridgeEvent.UpdateAgentStatus, [message]);
4450
},
51+
executeAction: async <T = unknown>(
52+
actionName: string,
53+
context: ExecutorContext,
54+
param: T,
55+
): Promise<void> => {
56+
return await server.call(BridgeEvent.ExecuteAction, [
57+
actionName,
58+
context,
59+
param,
60+
]);
61+
},
4562
};
4663

4764
return new Proxy(page, {

packages/web-integration/src/bridge-mode/common.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export enum BridgeEvent {
1414
GetBrowserTabList = 'getBrowserTabList',
1515
SetDestroyOptions = 'setDestroyOptions',
1616
SetActiveTabId = 'setActiveTabId',
17+
ExecuteAction = 'executeAction',
1718
}
1819

1920
export const BridgeSignalKill = 'MIDSCENE_BRIDGE_SIGNAL_KILL';

packages/web-integration/src/bridge-mode/page-browser-side.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@ export class ExtensionBridgePageBrowserSide extends ChromeExtensionProxyPage {
6262
return this.onLogMessage(args[0] as string, 'status');
6363
}
6464

65+
if (method === BridgeEvent.ExecuteAction) {
66+
const [actionName, context, param] = args;
67+
const actionSpace = this.actionSpace();
68+
const action = actionSpace.find((a) => a.name === actionName);
69+
if (!action) {
70+
throw new Error(`Action ${actionName} not found in action space`);
71+
}
72+
return action.call(context, param);
73+
}
74+
6575
const tabId = await this.getActiveTabId();
6676
if (!tabId || tabId === 0) {
6777
throw new Error('no tab is connected');

packages/web-integration/src/chrome-extension/page.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,15 @@
77

88
import type { WebKeyInput } from '@/common/page';
99
import { limitOpenNewTabScript } from '@/common/ui-utils';
10-
import { commonWebActionsForWebPage } from '@/common/utils';
10+
import {
11+
commonWebActionsForWebPage,
12+
executeActionForPage,
13+
} from '@/common/utils';
1114
import type { AbstractPage, MouseButton } from '@/page';
1215
import type {
1316
DeviceAction,
1417
ElementTreeNode,
18+
ExecutorContext,
1519
Point,
1620
Size,
1721
} from '@midscene/core';
@@ -60,6 +64,14 @@ export default class ChromeExtensionProxyPage implements AbstractPage {
6064
return commonWebActionsForWebPage(this);
6165
}
6266

67+
async executeAction<T = unknown>(
68+
actionName: string,
69+
context: ExecutorContext,
70+
param: T,
71+
): Promise<void> {
72+
return executeActionForPage(this, actionName, context, param);
73+
}
74+
6375
public async setActiveTabId(tabId: number) {
6476
if (this.activeTabId) {
6577
throw new Error(

packages/web-integration/src/common/tasks.ts

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -203,15 +203,15 @@ export class PageTaskExecutor {
203203
},
204204
) {
205205
const tasks: ExecutionTaskApply[] = [];
206-
plans.forEach((plan) => {
206+
for (const plan of plans) {
207207
if (plan.type === 'Locate') {
208208
if (
209209
plan.locate === null ||
210210
plan.locate?.id === null ||
211211
plan.locate?.id === 'null'
212212
) {
213213
// console.warn('Locate action with id is null, will be ignored');
214-
return;
214+
continue;
215215
}
216216
const taskFind: ExecutionTaskInsightLocateApply = {
217217
type: 'Insight',
@@ -495,13 +495,7 @@ export class PageTaskExecutor {
495495
};
496496
tasks.push(taskActionDrag);
497497
} else {
498-
const actionSpace = this.page.actionSpace();
499498
const planType = plan.type;
500-
const action = actionSpace.find((action) => action.name === planType);
501-
if (!action) {
502-
throw new Error(`Action ${planType} not found in action space`);
503-
}
504-
505499
const task: ExecutionTaskActionApply = {
506500
type: 'Action',
507501
subType: planType,
@@ -510,17 +504,16 @@ export class PageTaskExecutor {
510504
executor: async (param, context) => {
511505
debug(
512506
'executing action',
513-
action.name,
507+
planType,
514508
param,
515509
`context.element.center: ${context.element?.center}`,
516510
);
517-
return action.call(context, param);
511+
return await this.page.executeAction(planType, context, param);
518512
},
519513
};
520-
assert(task, `Unknown or unsupported task type: ${planType}`);
521514
tasks.push(task);
522515
}
523-
});
516+
}
524517

525518
const wrappedTasks = tasks.map(
526519
(task: ExecutionTaskApply, index: number) => {

packages/web-integration/src/common/utils.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import type {
55
ElementTreeNode,
66
ExecutionDump,
77
ExecutionTask,
8+
ExecutorContext,
89
PlanningLocateParam,
910
PlaywrightParserOpt,
1011
ScrollParam,
@@ -348,6 +349,22 @@ export const parsePrompt = (
348349
};
349350
};
350351

352+
export const executeActionForPage = async <T extends AbstractPage, P = unknown>(
353+
page: T,
354+
actionName: string,
355+
context: ExecutorContext,
356+
param: P,
357+
): Promise<void> => {
358+
const actions = await page.actionSpace();
359+
const action = actions.find((a) => a.name === actionName);
360+
361+
if (!action) {
362+
throw new Error(`Action ${actionName} not found in action space`);
363+
}
364+
365+
return action.call(context, param);
366+
};
367+
351368
export const commonWebActionsForWebPage = <T extends AbstractPage>(
352369
page: T,
353370
): DeviceAction[] => [

packages/web-integration/src/page.ts

Lines changed: 13 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
import type { DeviceAction, Point, ScrollParam, Size } from '@midscene/core';
1+
import type {
2+
DeviceAction,
3+
ExecutorContext,
4+
Point,
5+
ScrollParam,
6+
Size,
7+
} from '@midscene/core';
28
import { sleep } from '@midscene/core/utils';
39
import type { ElementInfo, ElementNode } from '@midscene/shared/extractor';
410
import { assert } from '@midscene/shared/utils';
@@ -43,7 +49,12 @@ export abstract class AbstractPage {
4349
abstract url(): string | Promise<string>;
4450
abstract screenshotBase64?(): Promise<string>;
4551
abstract size(): Promise<Size>;
46-
abstract actionSpace(): DeviceAction[];
52+
abstract actionSpace(): DeviceAction[] | Promise<DeviceAction[]>;
53+
abstract executeAction<T = unknown>(
54+
actionName: string,
55+
context: ExecutorContext,
56+
param: T,
57+
): Promise<void>;
4758

4859
get mouse(): MouseAction {
4960
return {
@@ -94,124 +105,3 @@ export abstract class AbstractPage {
94105

95106
abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;
96107
}
97-
98-
export const commonWebActionsForWebPage = <T extends AbstractPage>(
99-
page: T,
100-
): DeviceAction[] => [
101-
{
102-
name: 'Tap',
103-
description: 'Tap the element',
104-
location: 'required',
105-
call: async (context) => {
106-
const { element } = context;
107-
assert(element, 'Element not found, cannot tap');
108-
await page.mouse.click(element.center[0], element.center[1], {
109-
button: 'left',
110-
});
111-
},
112-
},
113-
{
114-
name: 'RightClick',
115-
description: 'Right click the element',
116-
location: 'required',
117-
call: async (context) => {
118-
const { element } = context;
119-
assert(element, 'Element not found, cannot right click');
120-
await page.mouse.click(element.center[0], element.center[1], {
121-
button: 'right',
122-
});
123-
},
124-
},
125-
{
126-
name: 'Hover',
127-
description: 'Move the mouse to the element',
128-
location: 'required',
129-
call: async (context) => {
130-
const { element } = context;
131-
assert(element, 'Element not found, cannot hover');
132-
await page.mouse.move(element.center[0], element.center[1]);
133-
},
134-
},
135-
{
136-
name: 'Input',
137-
description: 'Replace the input field with a new value',
138-
paramSchema: '{ value: string }',
139-
paramDescription:
140-
'`value` is the final that should be filled in the input box. No matter what modifications are required, just provide the final value to replace the existing input value. Giving a blank string means clear the input field.',
141-
location: 'required',
142-
whatToLocate: 'The input field to be filled',
143-
call: async (context, param) => {
144-
const { element } = context;
145-
if (element) {
146-
await page.clearInput(element as unknown as ElementInfo);
147-
148-
if (!param || !param.value) {
149-
return;
150-
}
151-
}
152-
153-
// Note: there is another implementation in AndroidDevicePage, which is more complex
154-
await page.keyboard.type(param.value);
155-
},
156-
} as DeviceAction<{ value: string }>,
157-
{
158-
name: 'KeyboardPress',
159-
description: 'Press a key',
160-
paramSchema: '{ value: string }',
161-
paramDescription: 'The key to be pressed',
162-
location: false,
163-
call: async (context, param) => {
164-
const keys = getKeyCommands(param.value);
165-
await page.keyboard.press(keys as any); // TODO: fix this type error
166-
},
167-
} as DeviceAction<{ value: string }>,
168-
{
169-
name: 'Scroll',
170-
description: 'Scroll the page or an element',
171-
paramSchema:
172-
'{ direction: "down"(default) | "up" | "right" | "left", scrollType: "once" (default) | "untilBottom" | "untilTop" | "untilRight" | "untilLeft", distance: number | null }',
173-
paramDescription:
174-
'The direction to scroll, the scroll type, and the distance to scroll. The distance is the number of pixels to scroll. If not specified, use `down` direction, `once` scroll type, and `null` distance.',
175-
location: 'optional',
176-
whatToLocate: 'The element to be scrolled',
177-
call: async (context, param) => {
178-
const { element } = context;
179-
const startingPoint = element
180-
? {
181-
left: element.center[0],
182-
top: element.center[1],
183-
}
184-
: undefined;
185-
const scrollToEventName = param?.scrollType;
186-
if (scrollToEventName === 'untilTop') {
187-
await page.scrollUntilTop(startingPoint);
188-
} else if (scrollToEventName === 'untilBottom') {
189-
await page.scrollUntilBottom(startingPoint);
190-
} else if (scrollToEventName === 'untilRight') {
191-
await page.scrollUntilRight(startingPoint);
192-
} else if (scrollToEventName === 'untilLeft') {
193-
await page.scrollUntilLeft(startingPoint);
194-
} else if (scrollToEventName === 'once' || !scrollToEventName) {
195-
if (param?.direction === 'down' || !param || !param.direction) {
196-
await page.scrollDown(param?.distance || undefined, startingPoint);
197-
} else if (param.direction === 'up') {
198-
await page.scrollUp(param.distance || undefined, startingPoint);
199-
} else if (param.direction === 'left') {
200-
await page.scrollLeft(param.distance || undefined, startingPoint);
201-
} else if (param.direction === 'right') {
202-
await page.scrollRight(param.distance || undefined, startingPoint);
203-
} else {
204-
throw new Error(`Unknown scroll direction: ${param.direction}`);
205-
}
206-
// until mouse event is done
207-
await sleep(500);
208-
} else {
209-
throw new Error(
210-
`Unknown scroll event type: ${scrollToEventName}, param: ${JSON.stringify(
211-
param,
212-
)}`,
213-
);
214-
}
215-
},
216-
} as DeviceAction<ScrollParam>,
217-
];

packages/web-integration/src/playground/static-page.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import { ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED } from '@/common/utils';
1+
import { ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED, executeActionForPage } from '@/common/utils';
22
import type { AbstractPage } from '@/page';
3-
import type { DeviceAction, Point } from '@midscene/core';
3+
import type { DeviceAction, ExecutorContext, Point } from '@midscene/core';
44
import type { WebUIContext } from '../web-element';
55

66
const ThrowNotImplemented: any = (methodName: string) => {
@@ -32,6 +32,14 @@ export default class StaticPage implements AbstractPage {
3232
return [];
3333
}
3434

35+
async executeAction<T = unknown>(
36+
actionName: string,
37+
context: ExecutorContext,
38+
param: T,
39+
): Promise<void> {
40+
return executeActionForPage(this, actionName, context, param);
41+
}
42+
3543
async evaluateJavaScript<T = any>(script: string): Promise<T> {
3644
return ThrowNotImplemented('evaluateJavaScript');
3745
}

0 commit comments

Comments
 (0)