Skip to content

Commit c0e647c

Browse files
quanruyuyutaotao
andauthored
feat(web-integration): reusing context param (#978)
* feat(web-integration): support force context para * feat(web-integration): update getElementXpath to use UIContext<BaseElement> * refactor(web-integration): support free context * refactor(web-integration): support freeze for all VQA api * feat(site): add freezePageContext and unfreezePageContext methods for improved performance * refactor(web-integration): remove redundant tests and streamline context handling in freeze/unfreeze functionality * docs(core): docs for context-freezing (#1013) * refactor(web-integration): remove isPageContextFrozen flag and simplify context management in PageAgent --------- Co-authored-by: yuyutaotao <[email protected]>
1 parent 32f4016 commit c0e647c

File tree

20 files changed

+556
-32
lines changed

20 files changed

+556
-32
lines changed

apps/chrome-extension/src/scripts/worker.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/// <reference types="chrome" />
22

3-
import type { WebUIContext } from '@midscene/web/utils';
3+
import type { WebUIContext } from '@midscene/web';
44

55
const workerMessageTypes = {
66
SAVE_CONTEXT: 'save-context',

apps/report/src/components/detail-panel.tsx

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ import type {
1313
} from '@midscene/core';
1414
import { filterBase64Value, timeStr } from '@midscene/visualizer';
1515
import { Blackboard, Player } from '@midscene/visualizer';
16-
import { Segmented } from 'antd';
16+
import type { WebUIContext } from '@midscene/web';
17+
import { Segmented, Tooltip } from 'antd';
1718
import { useEffect, useState } from 'react';
1819
import OpenInPlayground from './open-in-playground';
1920

@@ -49,6 +50,11 @@ const DetailPanel = (): JSX.Element => {
4950
const imageWidth = useExecutionDump((store) => store.insightWidth);
5051
const imageHeight = useExecutionDump((store) => store.insightHeight);
5152

53+
// Check if page context is frozen
54+
const isPageContextFrozen = Boolean(
55+
(activeTask?.pageContext as WebUIContext)?._isFrozen,
56+
);
57+
5258
let availableViewTypes = [VIEW_TYPE_SCREENSHOT, VIEW_TYPE_JSON];
5359
if (blackboardViewAvailable) {
5460
availableViewTypes = [
@@ -163,7 +169,11 @@ const DetailPanel = (): JSX.Element => {
163169
}
164170
if (type === VIEW_TYPE_BLACKBOARD) {
165171
return {
166-
label: 'Insight',
172+
label: isPageContextFrozen ? (
173+
<Tooltip title="Current pageContext is frozen">Insight 🧊</Tooltip>
174+
) : (
175+
'Insight'
176+
),
167177
value: type,
168178
icon: <ScheduleOutlined />,
169179
};

apps/report/src/components/detail-side.tsx

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ const DetailSide = (): JSX.Element => {
226226
let taskInput: JSX.Element | null = null;
227227
if (task?.type === 'Planning') {
228228
const planningTask = task as ExecutionTaskPlanning;
229+
const isPageContextFrozen = Boolean((task?.pageContext as any)?._isFrozen);
229230
if (planningTask.param?.userInstruction) {
230231
taskInput = MetaKV({
231232
data: [
@@ -238,6 +239,14 @@ const DetailSide = (): JSX.Element => {
238239
key: 'log',
239240
content: planningTask.param.log,
240241
},
242+
...(isPageContextFrozen
243+
? [
244+
{
245+
key: 'context',
246+
content: <Tag color="blue">Frozen Context 🧊</Tag>,
247+
},
248+
]
249+
: []),
241250
],
242251
});
243252
} else {
@@ -248,10 +257,19 @@ const DetailSide = (): JSX.Element => {
248257
key: 'userPrompt',
249258
content: paramStr(task) || '',
250259
},
260+
...(isPageContextFrozen
261+
? [
262+
{
263+
key: 'context',
264+
content: <Tag color="blue">Frozen Context 🧊</Tag>,
265+
},
266+
]
267+
: []),
251268
],
252269
});
253270
}
254271
} else if (task?.type === 'Insight') {
272+
const isPageContextFrozen = Boolean((task?.pageContext as any)?._isFrozen);
255273
taskInput = MetaKV({
256274
data: [
257275
{ key: 'type', content: (task && typeStr(task)) || '' },
@@ -279,6 +297,14 @@ const DetailSide = (): JSX.Element => {
279297
},
280298
]
281299
: []),
300+
...(isPageContextFrozen
301+
? [
302+
{
303+
key: 'context',
304+
content: <Tag color="blue">Frozen Context 🧊</Tag>,
305+
},
306+
]
307+
: []),
282308
],
283309
});
284310
} else if (task?.type === 'Action') {

apps/report/src/components/open-in-playground.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import {
55
useEnvConfig,
66
useStaticPageAgent,
77
} from '@midscene/visualizer';
8-
import type { WebUIContext } from '@midscene/web/utils';
8+
import type { WebUIContext } from '@midscene/web';
99
import {
1010
Button,
1111
ConfigProvider,

apps/site/docs/en/api.mdx

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -722,27 +722,61 @@ await agent.logScreenshot('Login page', {
722722
});
723723
```
724724

725-
### `agent._unstableLogContent()`
725+
### `agent.freezePageContext()`
726726

727-
Retrieve the log content in JSON format from the report file. The structure of the log content may change in the future.
727+
Freeze the current page context, allowing all subsequent operations to reuse the same page snapshot without retrieving the page state repeatedly. This significantly improves performance when executing a large number of concurrent operations.
728+
729+
Some notes:
730+
* Usually, you do not need to use this method, unless you are certain that "context retrieval" is the bottleneck of your test script.
731+
* You need to call `agent.unfreezePageContext()` in time to restore the real-time page state.
732+
* Do not call this method in interaction operations, it will make the AI model unable to perceive the latest page state, causing confusing errors.
728733

729734
- Type
730735

731736
```typescript
732-
function _unstableLogContent(): Object;
737+
function freezePageContext(): Promise<void>;
733738
```
734739

735740
- Return Value:
736741

737-
- Returns an object containing the log content.
742+
- `Promise<void>`
738743

739744
- Examples:
740745

741746
```typescript
742-
const logContent = agent._unstableLogContent();
743-
console.log(logContent);
747+
// Freeze the page context
748+
await agent.freezePageContext();
749+
750+
// Some queries...
751+
const results = await Promise.all([
752+
await agent.aiQuery('Username input box value'),
753+
await agent.aiQuery('Password input box value'),
754+
await agent.aiLocate('Login button'),
755+
]);
756+
console.log(results);
757+
758+
// Unfreeze the page context, subsequent operations will use real-time page state
759+
await agent.unfreezePageContext();
760+
```
761+
762+
:::tip
763+
In the report, operations using frozen context will display a 🧊 icon in the Insight tab.
764+
:::
765+
766+
### `agent.unfreezePageContext()`
767+
768+
Unfreezes the page context, restoring the use of real-time page state.
769+
770+
- Type
771+
772+
```typescript
773+
function unfreezePageContext(): Promise<void>;
744774
```
745775

776+
- Return Value:
777+
778+
- `Promise<void>`
779+
746780
## Properties
747781

748782
### `.reportFile`

apps/site/docs/zh/api.mdx

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,61 @@ await agent.logScreenshot('登录页面', {
714714
});
715715
```
716716

717+
### `agent.freezePageContext()`
718+
719+
冻结当前页面上下文,使后续所有的操作都复用同一个页面快照,避免多次重复获取页面状态。在执行大量并发操作时,它可以显著提升性能。
720+
721+
一些注意点:
722+
* 通常情况下,你不需要使用这个方法,除非你确定“页面状态获取”是脚本性能瓶颈。
723+
* 需要及时调用 `agent.unfreezePageContext()` 来恢复实时页面状态。
724+
* 不要在交互类操作中使用这个方法,它会让 AI 模型无法感知到页面的最新状态,产生令人困惑的错误。
725+
726+
- 类型
727+
728+
```typescript
729+
function freezePageContext(): Promise<void>;
730+
```
731+
732+
- 返回值:
733+
734+
- `Promise<void>`
735+
736+
- 示例:
737+
738+
```typescript
739+
// 冻结页面上下文,确保多个操作看到相同的页面状态
740+
await agent.freezePageContext();
741+
742+
// 执行一些操作...
743+
const results = await Promise.all([
744+
await agent.aiQuery('Username input box value'),
745+
await agent.aiQuery('Password input box value'),
746+
await agent.aiLocate('Login button'),
747+
]);
748+
console.log(results);
749+
750+
// 解冻页面上下文
751+
await agent.unfreezePageContext();
752+
```
753+
754+
:::tip
755+
在报告中,使用冻结上下文的操作会在 Insight tab 中显示 🧊 图标。
756+
:::
757+
758+
### `agent.unfreezePageContext()`
759+
760+
解冻页面上下文,恢复使用实时的页面状态。
761+
762+
- 类型
763+
764+
```typescript
765+
function unfreezePageContext(): Promise<void>;
766+
```
767+
768+
- 返回值:
769+
770+
- `Promise<void>`
771+
717772
### `agent._unstableLogContent()`
718773

719774
从报告文件中获取日志内容。日志内容的结构可能会在未来发生变化。

packages/core/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,6 @@ export type {
2121
MidsceneYamlFlowItem,
2222
MidsceneYamlFlowItemAIRightClick,
2323
MidsceneYamlConfigResult,
24+
LocateOption,
25+
DetailedLocateParam,
2426
} from './yaml';

packages/core/src/yaml.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import type { PlanningActionParamScroll, Rect, TUserPrompt } from './types';
2+
import type { BaseElement, UIContext } from './types';
23

34
export interface LocateOption {
45
deepThink?: boolean; // only available in vl model
56
cacheable?: boolean; // user can set this param to false to disable the cache for a single agent api
67
xpath?: string; // only available in web
8+
pageContext?: UIContext<BaseElement>;
79
}
810

911
export interface InsightExtractOption {

packages/visualizer/src/component/describer.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import type {
55
Rect,
66
UIContext,
77
} from '@midscene/core';
8-
import type { WebUIContext } from '@midscene/web/utils';
8+
import type { WebUIContext } from '@midscene/web';
99
import { useEffect, useRef, useState } from 'react';
1010
import { useStaticPageAgent } from './playground/useStaticPageAgent';
1111
import './describer.less';

packages/visualizer/src/component/playground/playground-types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import type { GroupedActionDump, UIContext } from '@midscene/core';
2+
import type { WebUIContext } from '@midscene/web';
23
import type { ChromeExtensionProxyPageAgent } from '@midscene/web/chrome-extension';
34
import type { StaticPageAgent } from '@midscene/web/playground';
4-
import type { WebUIContext } from '@midscene/web/utils';
55

66
// result type
77
export interface PlaygroundResult {

0 commit comments

Comments
 (0)