Skip to content

Commit a3c7aff

Browse files
committed
chore(core): update implementation for actions
1 parent 7a8f3b7 commit a3c7aff

File tree

12 files changed

+257
-384
lines changed

12 files changed

+257
-384
lines changed

packages/android/src/page/index.ts

Lines changed: 97 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import { repeat } from '@midscene/shared/utils';
1818
import {
1919
type AndroidDeviceInputOpt,
2020
type AndroidDevicePage,
21-
commonWebActions,
21+
commonWebActionsForWebPage,
2222
} from '@midscene/web';
2323
import { ADB } from 'appium-adb';
2424

@@ -35,51 +35,6 @@ export type AndroidDeviceOpt = {
3535
imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
3636
} & AndroidDeviceInputOpt;
3737

38-
const asyncNoop = async () => {};
39-
const androidActions: DeviceAction[] = [
40-
{
41-
name: 'AndroidBackButton',
42-
description: 'Trigger the system "back" operation on Android devices',
43-
location: false,
44-
call: asyncNoop,
45-
},
46-
{
47-
name: 'AndroidHomeButton',
48-
description: 'Trigger the system "home" operation on Android devices',
49-
location: false,
50-
call: asyncNoop,
51-
},
52-
{
53-
name: 'AndroidRecentAppsButton',
54-
description:
55-
'Trigger the system "recent apps" operation on Android devices',
56-
location: false,
57-
call: asyncNoop,
58-
},
59-
{
60-
name: 'AndroidLongPress',
61-
description:
62-
'Trigger a long press on the screen at specified coordinates on Android devices',
63-
paramSchema: '{ duration?: number }',
64-
paramDescription: 'The duration of the long press',
65-
location: 'optional',
66-
whatToLocate: 'The element to be long pressed',
67-
call: asyncNoop,
68-
},
69-
{
70-
name: 'AndroidPull',
71-
description:
72-
'Trigger pull down to refresh or pull up actions on Android devices',
73-
paramSchema:
74-
'{ direction: "up" | "down", distance?: number, duration?: number }',
75-
paramDescription:
76-
'The direction to pull, the distance to pull, and the duration of the pull.',
77-
location: 'optional',
78-
whatToLocate: 'The element to be pulled',
79-
call: asyncNoop,
80-
},
81-
];
82-
8338
export class AndroidDevice implements AndroidDevicePage {
8439
private deviceId: string;
8540
private yadbPushed = false;
@@ -92,7 +47,102 @@ export class AndroidDevice implements AndroidDevicePage {
9247
options?: AndroidDeviceOpt;
9348

9449
actionSpace(): DeviceAction[] {
95-
return commonWebActions.concat(androidActions);
50+
const commonActions = commonWebActionsForWebPage(this);
51+
commonActions.forEach((action) => {
52+
if (action.name === 'Input') {
53+
action.call = async (context, param) => {
54+
const { element } = context;
55+
if (element) {
56+
await this.clearInput(element as unknown as ElementInfo);
57+
58+
if (!param || !param.value) {
59+
return;
60+
}
61+
}
62+
63+
await this.keyboard.type(param.value, {
64+
autoDismissKeyboard: this.options?.autoDismissKeyboard,
65+
});
66+
};
67+
}
68+
});
69+
70+
const allActions: DeviceAction[] = [
71+
...commonWebActionsForWebPage(this),
72+
{
73+
name: 'AndroidBackButton',
74+
description: 'Trigger the system "back" operation on Android devices',
75+
location: false,
76+
call: async (context, param) => {
77+
await this.back();
78+
},
79+
},
80+
{
81+
name: 'AndroidHomeButton',
82+
description: 'Trigger the system "home" operation on Android devices',
83+
location: false,
84+
call: async (context, param) => {
85+
await this.home();
86+
},
87+
},
88+
{
89+
name: 'AndroidRecentAppsButton',
90+
description:
91+
'Trigger the system "recent apps" operation on Android devices',
92+
location: false,
93+
call: async (context, param) => {
94+
await this.recentApps();
95+
},
96+
},
97+
{
98+
name: 'AndroidLongPress',
99+
description:
100+
'Trigger a long press on the screen at specified coordinates on Android devices',
101+
paramSchema: '{ duration?: number }',
102+
paramDescription: 'The duration of the long press in milliseconds',
103+
location: 'required',
104+
whatToLocate: 'The element to be long pressed',
105+
call: async (context, param) => {
106+
const { element } = context;
107+
if (!element) {
108+
throw new Error(
109+
'AndroidLongPress requires an element to be located',
110+
);
111+
}
112+
const [x, y] = element.center;
113+
await this.longPress(x, y, param.duration);
114+
},
115+
} as DeviceAction<{ duration?: number }>,
116+
{
117+
name: 'AndroidPull',
118+
description:
119+
'Trigger pull down to refresh or pull up actions on Android devices',
120+
paramSchema:
121+
'{ direction: "up" | "down", distance?: number, duration?: number }',
122+
paramDescription:
123+
'The direction to pull, the distance to pull (in pixels), and the duration of the pull (in milliseconds).',
124+
location: 'optional',
125+
whatToLocate: 'The element to be pulled',
126+
call: async (context, param) => {
127+
const { element } = context;
128+
const startPoint = element
129+
? { left: element.center[0], top: element.center[1] }
130+
: undefined;
131+
if (param.direction === 'down') {
132+
await this.pullDown(startPoint, param.distance, param.duration);
133+
} else if (param.direction === 'up') {
134+
await this.pullUp(startPoint, param.distance, param.duration);
135+
} else {
136+
throw new Error(`Unknown pull direction: ${param.direction}`);
137+
}
138+
},
139+
} as DeviceAction<{
140+
direction: 'up' | 'down';
141+
distance?: number;
142+
duration?: number;
143+
}>,
144+
];
145+
return allActions;
96146
}
97147

98148
constructor(deviceId: string, options?: AndroidDeviceOpt) {

packages/core/src/ai-model/common.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ import type {
55
MidsceneYamlFlowItem,
66
PlanningAction,
77
PlanningActionParamInputOrKeyPress,
8-
PlanningActionParamScroll,
98
PlanningActionParamSleep,
109
Rect,
10+
ScrollParam,
1111
Size,
1212
} from '@/types';
1313
import { assert } from '@midscene/shared/utils';
@@ -355,7 +355,7 @@ export function buildYamlFlowFromPlans(
355355
locate,
356356
});
357357
} else if (type === 'Scroll') {
358-
const param = plan.param as PlanningActionParamScroll;
358+
const param = plan.param as ScrollParam;
359359
flow.push({
360360
aiScroll: null,
361361
locate,

packages/core/src/types.ts

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,7 @@ import type {
88
Size,
99
} from '@midscene/shared/types';
1010
import type { ChatCompletionMessageParam } from 'openai/resources';
11-
import type {
12-
DetailedLocateParam,
13-
MidsceneYamlFlowItem,
14-
scrollParam,
15-
} from './yaml';
11+
import type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';
1612

1713
export type {
1814
ElementTreeNode,
@@ -295,13 +291,12 @@ export interface PlanningAIResponse {
295291
export type PlanningActionParamTap = null;
296292
export type PlanningActionParamHover = null;
297293
export type PlanningActionParamRightClick = null;
294+
298295
export interface PlanningActionParamInputOrKeyPress {
299296
value: string;
300297
autoDismissKeyboard?: boolean;
301298
}
302299

303-
export type PlanningActionParamScroll = scrollParam;
304-
305300
export interface PlanningActionParamAssert {
306301
assertion: TUserPrompt;
307302
}
@@ -318,15 +313,12 @@ export type PlanningActionParamWaitFor = AgentWaitForOpt & {
318313
assertion: string;
319314
};
320315

321-
export interface PlanningActionParamAndroidLongPress {
322-
x: number;
323-
y: number;
316+
export interface AndroidLongPressParam {
324317
duration?: number;
325318
}
326319

327-
export interface PlanningActionParamAndroidPull {
320+
export interface AndroidPullParam {
328321
direction: 'up' | 'down';
329-
startPoint?: { x: number; y: number };
330322
distance?: number;
331323
duration?: number;
332324
}
@@ -623,5 +615,5 @@ export interface DeviceAction<ParamType = any> {
623615
paramDescription?: string;
624616
location?: 'required' | 'optional' | false;
625617
whatToLocate?: string; // what to locate if location is required or optional
626-
call: (param: ParamType) => Promise<void> | void;
618+
call: (context: ExecutorContext, param: ParamType) => Promise<void> | void;
627619
}

packages/core/src/yaml.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { PlanningActionParamScroll, Rect, TUserPrompt } from './types';
1+
import type { Rect, TUserPrompt } from './types';
22
import type { BaseElement, UIContext } from './types';
33

44
export interface LocateOption {
@@ -24,7 +24,7 @@ export interface DetailedLocateParam extends LocateOption {
2424
referenceImage?: ReferenceImage;
2525
}
2626

27-
export interface scrollParam {
27+
export interface ScrollParam {
2828
direction: 'down' | 'up' | 'right' | 'left';
2929
scrollType: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
3030
distance?: null | number; // distance in px
@@ -157,7 +157,7 @@ export interface MidsceneYamlFlowItemAIKeyboardPress extends LocateOption {
157157

158158
export interface MidsceneYamlFlowItemAIScroll
159159
extends LocateOption,
160-
PlanningActionParamScroll {
160+
ScrollParam {
161161
aiScroll: null;
162162
locate?: TUserPrompt; // which area to scroll, optional
163163
}

packages/web-integration/src/chrome-extension/page.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@
77

88
import type { WebKeyInput } from '@/common/page';
99
import { limitOpenNewTabScript } from '@/common/ui-utils';
10-
import { type AbstractPage, type MouseButton, commonWebActions } from '@/page';
10+
import {
11+
type AbstractPage,
12+
type MouseButton,
13+
commonWebActionsForWebPage,
14+
} from '@/page';
1115
import type {
1216
DeviceAction,
1317
ElementTreeNode,
@@ -56,7 +60,7 @@ export default class ChromeExtensionProxyPage implements AbstractPage {
5660
}
5761

5862
actionSpace(): DeviceAction[] {
59-
return commonWebActions;
63+
return commonWebActionsForWebPage(this);
6064
}
6165

6266
public async setActiveTabId(tabId: number) {

packages/web-integration/src/common/agent.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ import {
2020
type LocatorValidatorOption,
2121
type MidsceneYamlScript,
2222
type OnTaskStartTip,
23-
type PlanningActionParamScroll,
2423
type Rect,
24+
type ScrollParam,
2525
type TUserPrompt,
2626
} from '@midscene/core';
2727

@@ -411,7 +411,7 @@ export class PageAgent<PageType extends WebPage = WebPage> {
411411
}
412412

413413
async aiScroll(
414-
scrollParam: PlanningActionParamScroll,
414+
scrollParam: ScrollParam,
415415
locatePrompt?: TUserPrompt,
416416
opt?: LocateOption,
417417
) {

packages/web-integration/src/common/page.d.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ export interface AndroidDevicePage extends AbstractPage {
4949

5050
export type AndroidDeviceInputOpt = {
5151
autoDismissKeyboard?: boolean;
52-
hideKeyboardStrategy?: 'esc-first' | 'back-first';
52+
keyboardDismissStrategy?: 'esc-first' | 'back-first';
5353
};
5454

5555
export type WebPage =

0 commit comments

Comments
 (0)