1
1
import type { StaticPage } from '@/playground' ;
2
2
import type {
3
3
BaseElement ,
4
+ DeviceAction ,
4
5
ElementTreeNode ,
5
6
ExecutionDump ,
6
7
ExecutionTask ,
7
8
PlanningLocateParam ,
8
9
PlaywrightParserOpt ,
10
+ ScrollParam ,
9
11
TMultimodalPrompt ,
10
12
TUserPrompt ,
11
13
UIContext ,
12
14
} from '@midscene/core' ;
13
15
import { elementByPositionWithElementInfo } from '@midscene/core/ai-model' ;
14
- import { uploadTestInfoToServer } from '@midscene/core/utils' ;
16
+ import { sleep , uploadTestInfoToServer } from '@midscene/core/utils' ;
15
17
import { MIDSCENE_REPORT_TAG_NAME , getAIConfig } from '@midscene/shared/env' ;
16
18
import type { ElementInfo } from '@midscene/shared/extractor' ;
17
19
import {
@@ -25,10 +27,12 @@ import { assert, logMsg, uuid } from '@midscene/shared/utils';
25
27
import dayjs from 'dayjs' ;
26
28
import type { Page as PlaywrightPage } from 'playwright' ;
27
29
import type { Page as PuppeteerPage } from 'puppeteer' ;
30
+ import type { AbstractPage } from '../page' ;
28
31
import { WebElementInfo , type WebUIContext } from '../web-element' ;
29
32
import type { WebPage } from './page' ;
30
33
import { debug as cacheDebug } from './task-cache' ;
31
34
import type { PageTaskExecutor } from './tasks' ;
35
+ import { getKeyCommands } from './ui-utils' ;
32
36
33
37
const debug = getDebug ( 'tool:profile' ) ;
34
38
@@ -343,3 +347,124 @@ export const parsePrompt = (
343
347
: undefined ,
344
348
} ;
345
349
} ;
350
+
351
+ export const commonWebActionsForWebPage = < T extends AbstractPage > (
352
+ page : T ,
353
+ ) : DeviceAction [ ] => [
354
+ {
355
+ name : 'Tap' ,
356
+ description : 'Tap the element' ,
357
+ location : 'required' ,
358
+ call : async ( context ) => {
359
+ const { element } = context ;
360
+ assert ( element , 'Element not found, cannot tap' ) ;
361
+ await page . mouse . click ( element . center [ 0 ] , element . center [ 1 ] , {
362
+ button : 'left' ,
363
+ } ) ;
364
+ } ,
365
+ } ,
366
+ {
367
+ name : 'RightClick' ,
368
+ description : 'Right click the element' ,
369
+ location : 'required' ,
370
+ call : async ( context ) => {
371
+ const { element } = context ;
372
+ assert ( element , 'Element not found, cannot right click' ) ;
373
+ await page . mouse . click ( element . center [ 0 ] , element . center [ 1 ] , {
374
+ button : 'right' ,
375
+ } ) ;
376
+ } ,
377
+ } ,
378
+ {
379
+ name : 'Hover' ,
380
+ description : 'Move the mouse to the element' ,
381
+ location : 'required' ,
382
+ call : async ( context ) => {
383
+ const { element } = context ;
384
+ assert ( element , 'Element not found, cannot hover' ) ;
385
+ await page . mouse . move ( element . center [ 0 ] , element . center [ 1 ] ) ;
386
+ } ,
387
+ } ,
388
+ {
389
+ name : 'Input' ,
390
+ description : 'Replace the input field with a new value' ,
391
+ paramSchema : '{ value: string }' ,
392
+ paramDescription :
393
+ '`value` is the final that should be filled in the input box. No matter what modifications are required, just provide the final value to replace the existing input value. Giving a blank string means clear the input field.' ,
394
+ location : 'required' ,
395
+ whatToLocate : 'The input field to be filled' ,
396
+ call : async ( context , param ) => {
397
+ const { element } = context ;
398
+ if ( element ) {
399
+ await page . clearInput ( element as unknown as ElementInfo ) ;
400
+
401
+ if ( ! param || ! param . value ) {
402
+ return ;
403
+ }
404
+ }
405
+
406
+ // Note: there is another implementation in AndroidDevicePage, which is more complex
407
+ await page . keyboard . type ( param . value ) ;
408
+ } ,
409
+ } as DeviceAction < { value : string } > ,
410
+ {
411
+ name : 'KeyboardPress' ,
412
+ description : 'Press a key' ,
413
+ paramSchema : '{ value: string }' ,
414
+ paramDescription : 'The key to be pressed' ,
415
+ location : false ,
416
+ call : async ( context , param ) => {
417
+ const keys = getKeyCommands ( param . value ) ;
418
+ await page . keyboard . press ( keys as any ) ; // TODO: fix this type error
419
+ } ,
420
+ } as DeviceAction < { value : string } > ,
421
+ {
422
+ name : 'Scroll' ,
423
+ description : 'Scroll the page or an element' ,
424
+ paramSchema :
425
+ '{ direction: "down"(default) | "up" | "right" | "left", scrollType: "once" (default) | "untilBottom" | "untilTop" | "untilRight" | "untilLeft", distance: number | null }' ,
426
+ paramDescription :
427
+ 'The direction to scroll, the scroll type, and the distance to scroll. The distance is the number of pixels to scroll. If not specified, use `down` direction, `once` scroll type, and `null` distance.' ,
428
+ location : 'optional' ,
429
+ whatToLocate : 'The element to be scrolled' ,
430
+ call : async ( context , param ) => {
431
+ const { element } = context ;
432
+ const startingPoint = element
433
+ ? {
434
+ left : element . center [ 0 ] ,
435
+ top : element . center [ 1 ] ,
436
+ }
437
+ : undefined ;
438
+ const scrollToEventName = param ?. scrollType ;
439
+ if ( scrollToEventName === 'untilTop' ) {
440
+ await page . scrollUntilTop ( startingPoint ) ;
441
+ } else if ( scrollToEventName === 'untilBottom' ) {
442
+ await page . scrollUntilBottom ( startingPoint ) ;
443
+ } else if ( scrollToEventName === 'untilRight' ) {
444
+ await page . scrollUntilRight ( startingPoint ) ;
445
+ } else if ( scrollToEventName === 'untilLeft' ) {
446
+ await page . scrollUntilLeft ( startingPoint ) ;
447
+ } else if ( scrollToEventName === 'once' || ! scrollToEventName ) {
448
+ if ( param ?. direction === 'down' || ! param || ! param . direction ) {
449
+ await page . scrollDown ( param ?. distance || undefined , startingPoint ) ;
450
+ } else if ( param . direction === 'up' ) {
451
+ await page . scrollUp ( param . distance || undefined , startingPoint ) ;
452
+ } else if ( param . direction === 'left' ) {
453
+ await page . scrollLeft ( param . distance || undefined , startingPoint ) ;
454
+ } else if ( param . direction === 'right' ) {
455
+ await page . scrollRight ( param . distance || undefined , startingPoint ) ;
456
+ } else {
457
+ throw new Error ( `Unknown scroll direction: ${ param . direction } ` ) ;
458
+ }
459
+ // until mouse event is done
460
+ await sleep ( 500 ) ;
461
+ } else {
462
+ throw new Error (
463
+ `Unknown scroll event type: ${ scrollToEventName } , param: ${ JSON . stringify (
464
+ param ,
465
+ ) } `,
466
+ ) ;
467
+ }
468
+ } ,
469
+ } as DeviceAction < ScrollParam > ,
470
+ ] ;
0 commit comments