Skip to content

Commit 7f4437a

Browse files
yuyutaotaoquanru
authored andcommitted
chore(core): update implementation for actions
1 parent 82592e7 commit 7f4437a

File tree

11 files changed

+256
-383
lines changed

11 files changed

+256
-383
lines changed

packages/android/src/page/index.ts

Lines changed: 97 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import { repeat } from '@midscene/shared/utils';
1919
import {
2020
type AndroidDeviceInputOpt,
2121
type AndroidDevicePage,
22-
commonWebActions,
22+
commonWebActionsForWebPage,
2323
} from '@midscene/web';
2424
import { ADB } from 'appium-adb';
2525

@@ -36,51 +36,6 @@ export type AndroidDeviceOpt = {
3636
imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
3737
} & AndroidDeviceInputOpt;
3838

39-
const asyncNoop = async () => {};
40-
const androidActions: DeviceAction[] = [
41-
{
42-
name: 'AndroidBackButton',
43-
description: 'Trigger the system "back" operation on Android devices',
44-
location: false,
45-
call: asyncNoop,
46-
},
47-
{
48-
name: 'AndroidHomeButton',
49-
description: 'Trigger the system "home" operation on Android devices',
50-
location: false,
51-
call: asyncNoop,
52-
},
53-
{
54-
name: 'AndroidRecentAppsButton',
55-
description:
56-
'Trigger the system "recent apps" operation on Android devices',
57-
location: false,
58-
call: asyncNoop,
59-
},
60-
{
61-
name: 'AndroidLongPress',
62-
description:
63-
'Trigger a long press on the screen at specified coordinates on Android devices',
64-
paramSchema: '{ duration?: number }',
65-
paramDescription: 'The duration of the long press',
66-
location: 'optional',
67-
whatToLocate: 'The element to be long pressed',
68-
call: asyncNoop,
69-
},
70-
{
71-
name: 'AndroidPull',
72-
description:
73-
'Trigger pull down to refresh or pull up actions on Android devices',
74-
paramSchema:
75-
'{ direction: "up" | "down", distance?: number, duration?: number }',
76-
paramDescription:
77-
'The direction to pull, the distance to pull, and the duration of the pull.',
78-
location: 'optional',
79-
whatToLocate: 'The element to be pulled',
80-
call: asyncNoop,
81-
},
82-
];
83-
8439
export class AndroidDevice implements AndroidDevicePage {
8540
private deviceId: string;
8641
private yadbPushed = false;
@@ -93,7 +48,102 @@ export class AndroidDevice implements AndroidDevicePage {
9348
options?: AndroidDeviceOpt;
9449

9550
actionSpace(): DeviceAction[] {
96-
return commonWebActions.concat(androidActions);
51+
const commonActions = commonWebActionsForWebPage(this);
52+
commonActions.forEach((action) => {
53+
if (action.name === 'Input') {
54+
action.call = async (context, param) => {
55+
const { element } = context;
56+
if (element) {
57+
await this.clearInput(element as unknown as ElementInfo);
58+
59+
if (!param || !param.value) {
60+
return;
61+
}
62+
}
63+
64+
await this.keyboard.type(param.value, {
65+
autoDismissKeyboard: this.options?.autoDismissKeyboard,
66+
});
67+
};
68+
}
69+
});
70+
71+
const allActions: DeviceAction[] = [
72+
...commonWebActionsForWebPage(this),
73+
{
74+
name: 'AndroidBackButton',
75+
description: 'Trigger the system "back" operation on Android devices',
76+
location: false,
77+
call: async (context, param) => {
78+
await this.back();
79+
},
80+
},
81+
{
82+
name: 'AndroidHomeButton',
83+
description: 'Trigger the system "home" operation on Android devices',
84+
location: false,
85+
call: async (context, param) => {
86+
await this.home();
87+
},
88+
},
89+
{
90+
name: 'AndroidRecentAppsButton',
91+
description:
92+
'Trigger the system "recent apps" operation on Android devices',
93+
location: false,
94+
call: async (context, param) => {
95+
await this.recentApps();
96+
},
97+
},
98+
{
99+
name: 'AndroidLongPress',
100+
description:
101+
'Trigger a long press on the screen at specified coordinates on Android devices',
102+
paramSchema: '{ duration?: number }',
103+
paramDescription: 'The duration of the long press in milliseconds',
104+
location: 'required',
105+
whatToLocate: 'The element to be long pressed',
106+
call: async (context, param) => {
107+
const { element } = context;
108+
if (!element) {
109+
throw new Error(
110+
'AndroidLongPress requires an element to be located',
111+
);
112+
}
113+
const [x, y] = element.center;
114+
await this.longPress(x, y, param.duration);
115+
},
116+
} as DeviceAction<{ duration?: number }>,
117+
{
118+
name: 'AndroidPull',
119+
description:
120+
'Trigger pull down to refresh or pull up actions on Android devices',
121+
paramSchema:
122+
'{ direction: "up" | "down", distance?: number, duration?: number }',
123+
paramDescription:
124+
'The direction to pull, the distance to pull (in pixels), and the duration of the pull (in milliseconds).',
125+
location: 'optional',
126+
whatToLocate: 'The element to be pulled',
127+
call: async (context, param) => {
128+
const { element } = context;
129+
const startPoint = element
130+
? { left: element.center[0], top: element.center[1] }
131+
: undefined;
132+
if (param.direction === 'down') {
133+
await this.pullDown(startPoint, param.distance, param.duration);
134+
} else if (param.direction === 'up') {
135+
await this.pullUp(startPoint, param.distance, param.duration);
136+
} else {
137+
throw new Error(`Unknown pull direction: ${param.direction}`);
138+
}
139+
},
140+
} as DeviceAction<{
141+
direction: 'up' | 'down';
142+
distance?: number;
143+
duration?: number;
144+
}>,
145+
];
146+
return allActions;
97147
}
98148

99149
constructor(deviceId: string, options?: AndroidDeviceOpt) {

packages/core/src/ai-model/common.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ import type {
55
MidsceneYamlFlowItem,
66
PlanningAction,
77
PlanningActionParamInputOrKeyPress,
8-
PlanningActionParamScroll,
98
PlanningActionParamSleep,
109
Rect,
10+
ScrollParam,
1111
Size,
1212
} from '@/types';
1313
import { assert } from '@midscene/shared/utils';
@@ -356,7 +356,7 @@ export function buildYamlFlowFromPlans(
356356
locate,
357357
});
358358
} else if (type === 'Scroll') {
359-
const param = plan.param as PlanningActionParamScroll;
359+
const param = plan.param as ScrollParam;
360360
flow.push({
361361
aiScroll: null,
362362
locate,

packages/core/src/types.ts

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,7 @@ import type {
88
Size,
99
} from '@midscene/shared/types';
1010
import type { ChatCompletionMessageParam } from 'openai/resources/index';
11-
import type {
12-
DetailedLocateParam,
13-
MidsceneYamlFlowItem,
14-
scrollParam,
15-
} from './yaml';
11+
import type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';
1612

1713
export type {
1814
ElementTreeNode,
@@ -295,13 +291,12 @@ export interface PlanningAIResponse {
295291
export type PlanningActionParamTap = null;
296292
export type PlanningActionParamHover = null;
297293
export type PlanningActionParamRightClick = null;
294+
298295
export interface PlanningActionParamInputOrKeyPress {
299296
value: string;
300297
autoDismissKeyboard?: boolean;
301298
}
302299

303-
export type PlanningActionParamScroll = scrollParam;
304-
305300
export interface PlanningActionParamAssert {
306301
assertion: TUserPrompt;
307302
}
@@ -318,15 +313,12 @@ export type PlanningActionParamWaitFor = AgentWaitForOpt & {
318313
assertion: string;
319314
};
320315

321-
export interface PlanningActionParamAndroidLongPress {
322-
x: number;
323-
y: number;
316+
export interface AndroidLongPressParam {
324317
duration?: number;
325318
}
326319

327-
export interface PlanningActionParamAndroidPull {
320+
export interface AndroidPullParam {
328321
direction: 'up' | 'down';
329-
startPoint?: { x: number; y: number };
330322
distance?: number;
331323
duration?: number;
332324
}
@@ -623,5 +615,5 @@ export interface DeviceAction<ParamType = any> {
623615
paramDescription?: string;
624616
location?: 'required' | 'optional' | false;
625617
whatToLocate?: string; // what to locate if location is required or optional
626-
call: (param: ParamType) => Promise<void> | void;
618+
call: (context: ExecutorContext, param: ParamType) => Promise<void> | void;
627619
}

packages/core/src/yaml.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { PlanningActionParamScroll, Rect, TUserPrompt } from './types';
1+
import type { Rect, TUserPrompt } from './types';
22
import type { BaseElement, UIContext } from './types';
33

44
export interface LocateOption {
@@ -24,7 +24,7 @@ export interface DetailedLocateParam extends LocateOption {
2424
referenceImage?: ReferenceImage;
2525
}
2626

27-
export interface scrollParam {
27+
export interface ScrollParam {
2828
direction: 'down' | 'up' | 'right' | 'left';
2929
scrollType: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
3030
distance?: null | number; // distance in px
@@ -157,7 +157,7 @@ export interface MidsceneYamlFlowItemAIKeyboardPress extends LocateOption {
157157

158158
export interface MidsceneYamlFlowItemAIScroll
159159
extends LocateOption,
160-
PlanningActionParamScroll {
160+
ScrollParam {
161161
aiScroll: null;
162162
locate?: TUserPrompt; // which area to scroll, optional
163163
}

packages/web-integration/src/chrome-extension/page.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@
77

88
import type { WebKeyInput } from '@/common/page';
99
import { limitOpenNewTabScript } from '@/common/ui-utils';
10-
import { type AbstractPage, type MouseButton, commonWebActions } from '@/page';
10+
import {
11+
type AbstractPage,
12+
type MouseButton,
13+
commonWebActionsForWebPage,
14+
} from '@/page';
1115
import type {
1216
DeviceAction,
1317
ElementTreeNode,
@@ -56,7 +60,7 @@ export default class ChromeExtensionProxyPage implements AbstractPage {
5660
}
5761

5862
actionSpace(): DeviceAction[] {
59-
return commonWebActions;
63+
return commonWebActionsForWebPage(this);
6064
}
6165

6266
public async setActiveTabId(tabId: number) {

packages/web-integration/src/common/agent.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ import {
2020
type LocatorValidatorOption,
2121
type MidsceneYamlScript,
2222
type OnTaskStartTip,
23-
type PlanningActionParamScroll,
2423
type Rect,
24+
type ScrollParam,
2525
type TUserPrompt,
2626
} from '@midscene/core';
2727

@@ -411,7 +411,7 @@ export class PageAgent<PageType extends WebPage = WebPage> {
411411
}
412412

413413
async aiScroll(
414-
scrollParam: PlanningActionParamScroll,
414+
scrollParam: ScrollParam,
415415
locatePrompt?: TUserPrompt,
416416
opt?: LocateOption,
417417
) {

0 commit comments

Comments
 (0)