Skip to content

Commit 3f9aa2a

Browse files
authored
fix(web-integration): reorganize commonWebActionsForWebPage import and enhance utility functions (#1057)
1 parent fcb419f commit 3f9aa2a

File tree

4 files changed

+131
-12
lines changed

4 files changed

+131
-12
lines changed

packages/android/src/page/index.ts

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@ import type { ElementInfo } from '@midscene/shared/extractor';
1515
import { isValidPNGImageBuffer, resizeImg } from '@midscene/shared/img';
1616
import { getDebug } from '@midscene/shared/logger';
1717
import { repeat } from '@midscene/shared/utils';
18-
import {
19-
type AndroidDeviceInputOpt,
20-
type AndroidDevicePage,
21-
commonWebActionsForWebPage,
22-
} from '@midscene/web';
18+
import type { AndroidDeviceInputOpt, AndroidDevicePage } from '@midscene/web';
19+
import { commonWebActionsForWebPage } from '@midscene/web/utils';
20+
2321
import { ADB } from 'appium-adb';
2422

2523
// only for Android, because it's impossible to scroll to the bottom, so we need to set a default scroll times

packages/web-integration/src/chrome-extension/page.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,8 @@
77

88
import type { WebKeyInput } from '@/common/page';
99
import { limitOpenNewTabScript } from '@/common/ui-utils';
10-
import {
11-
type AbstractPage,
12-
type MouseButton,
13-
commonWebActionsForWebPage,
14-
} from '@/page';
10+
import { commonWebActionsForWebPage } from '@/common/utils';
11+
import type { AbstractPage, MouseButton } from '@/page';
1512
import type {
1613
DeviceAction,
1714
ElementTreeNode,

packages/web-integration/src/common/utils.ts

Lines changed: 126 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
import type { StaticPage } from '@/playground';
22
import type {
33
BaseElement,
4+
DeviceAction,
45
ElementTreeNode,
56
ExecutionDump,
67
ExecutionTask,
78
PlanningLocateParam,
89
PlaywrightParserOpt,
10+
ScrollParam,
911
TMultimodalPrompt,
1012
TUserPrompt,
1113
UIContext,
1214
} from '@midscene/core';
1315
import { elementByPositionWithElementInfo } from '@midscene/core/ai-model';
14-
import { uploadTestInfoToServer } from '@midscene/core/utils';
16+
import { sleep, uploadTestInfoToServer } from '@midscene/core/utils';
1517
import { MIDSCENE_REPORT_TAG_NAME, getAIConfig } from '@midscene/shared/env';
1618
import type { ElementInfo } from '@midscene/shared/extractor';
1719
import {
@@ -25,10 +27,12 @@ import { assert, logMsg, uuid } from '@midscene/shared/utils';
2527
import dayjs from 'dayjs';
2628
import type { Page as PlaywrightPage } from 'playwright';
2729
import type { Page as PuppeteerPage } from 'puppeteer';
30+
import type { AbstractPage } from '../page';
2831
import { WebElementInfo, type WebUIContext } from '../web-element';
2932
import type { WebPage } from './page';
3033
import { debug as cacheDebug } from './task-cache';
3134
import type { PageTaskExecutor } from './tasks';
35+
import { getKeyCommands } from './ui-utils';
3236

3337
const debug = getDebug('tool:profile');
3438

@@ -343,3 +347,124 @@ export const parsePrompt = (
343347
: undefined,
344348
};
345349
};
350+
351+
export const commonWebActionsForWebPage = <T extends AbstractPage>(
352+
page: T,
353+
): DeviceAction[] => [
354+
{
355+
name: 'Tap',
356+
description: 'Tap the element',
357+
location: 'required',
358+
call: async (context) => {
359+
const { element } = context;
360+
assert(element, 'Element not found, cannot tap');
361+
await page.mouse.click(element.center[0], element.center[1], {
362+
button: 'left',
363+
});
364+
},
365+
},
366+
{
367+
name: 'RightClick',
368+
description: 'Right click the element',
369+
location: 'required',
370+
call: async (context) => {
371+
const { element } = context;
372+
assert(element, 'Element not found, cannot right click');
373+
await page.mouse.click(element.center[0], element.center[1], {
374+
button: 'right',
375+
});
376+
},
377+
},
378+
{
379+
name: 'Hover',
380+
description: 'Move the mouse to the element',
381+
location: 'required',
382+
call: async (context) => {
383+
const { element } = context;
384+
assert(element, 'Element not found, cannot hover');
385+
await page.mouse.move(element.center[0], element.center[1]);
386+
},
387+
},
388+
{
389+
name: 'Input',
390+
description: 'Replace the input field with a new value',
391+
paramSchema: '{ value: string }',
392+
paramDescription:
393+
'`value` is the final that should be filled in the input box. No matter what modifications are required, just provide the final value to replace the existing input value. Giving a blank string means clear the input field.',
394+
location: 'required',
395+
whatToLocate: 'The input field to be filled',
396+
call: async (context, param) => {
397+
const { element } = context;
398+
if (element) {
399+
await page.clearInput(element as unknown as ElementInfo);
400+
401+
if (!param || !param.value) {
402+
return;
403+
}
404+
}
405+
406+
// Note: there is another implementation in AndroidDevicePage, which is more complex
407+
await page.keyboard.type(param.value);
408+
},
409+
} as DeviceAction<{ value: string }>,
410+
{
411+
name: 'KeyboardPress',
412+
description: 'Press a key',
413+
paramSchema: '{ value: string }',
414+
paramDescription: 'The key to be pressed',
415+
location: false,
416+
call: async (context, param) => {
417+
const keys = getKeyCommands(param.value);
418+
await page.keyboard.press(keys as any); // TODO: fix this type error
419+
},
420+
} as DeviceAction<{ value: string }>,
421+
{
422+
name: 'Scroll',
423+
description: 'Scroll the page or an element',
424+
paramSchema:
425+
'{ direction: "down"(default) | "up" | "right" | "left", scrollType: "once" (default) | "untilBottom" | "untilTop" | "untilRight" | "untilLeft", distance: number | null }',
426+
paramDescription:
427+
'The direction to scroll, the scroll type, and the distance to scroll. The distance is the number of pixels to scroll. If not specified, use `down` direction, `once` scroll type, and `null` distance.',
428+
location: 'optional',
429+
whatToLocate: 'The element to be scrolled',
430+
call: async (context, param) => {
431+
const { element } = context;
432+
const startingPoint = element
433+
? {
434+
left: element.center[0],
435+
top: element.center[1],
436+
}
437+
: undefined;
438+
const scrollToEventName = param?.scrollType;
439+
if (scrollToEventName === 'untilTop') {
440+
await page.scrollUntilTop(startingPoint);
441+
} else if (scrollToEventName === 'untilBottom') {
442+
await page.scrollUntilBottom(startingPoint);
443+
} else if (scrollToEventName === 'untilRight') {
444+
await page.scrollUntilRight(startingPoint);
445+
} else if (scrollToEventName === 'untilLeft') {
446+
await page.scrollUntilLeft(startingPoint);
447+
} else if (scrollToEventName === 'once' || !scrollToEventName) {
448+
if (param?.direction === 'down' || !param || !param.direction) {
449+
await page.scrollDown(param?.distance || undefined, startingPoint);
450+
} else if (param.direction === 'up') {
451+
await page.scrollUp(param.distance || undefined, startingPoint);
452+
} else if (param.direction === 'left') {
453+
await page.scrollLeft(param.distance || undefined, startingPoint);
454+
} else if (param.direction === 'right') {
455+
await page.scrollRight(param.distance || undefined, startingPoint);
456+
} else {
457+
throw new Error(`Unknown scroll direction: ${param.direction}`);
458+
}
459+
// until mouse event is done
460+
await sleep(500);
461+
} else {
462+
throw new Error(
463+
`Unknown scroll event type: ${scrollToEventName}, param: ${JSON.stringify(
464+
param,
465+
)}`,
466+
);
467+
}
468+
},
469+
} as DeviceAction<ScrollParam>,
470+
];

packages/web-integration/src/index.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ export type {
66
AndroidDeviceInputOpt,
77
} from './common/page';
88
export type { AbstractPage } from './page';
9-
export { commonWebActionsForWebPage } from './page';
109
export type { WebUIContext } from './web-element';
1110

1211
export { PageAgent, type PageAgentOpt } from './common/agent';

0 commit comments

Comments
 (0)