Skip to content

Commit 7c4c46c

Browse files
authored
chore: share contexts between agent calls (#38738)
1 parent 03a996f commit 7c4c46c

File tree

12 files changed

+166
-145
lines changed

12 files changed

+166
-145
lines changed

packages/playwright-client/types/types.d.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5281,7 +5281,7 @@ export interface PageAgent {
52815281
* @param schema
52825282
* @param options
52835283
*/
5284-
extract<Schema extends ZodTypeAny>(query: string, schema: Schema): Promise<ZodInfer<Schema>>;
5284+
extract<Schema extends ZodTypeAny>(query: string, schema: Schema): Promise<{ result: ZodInfer<Schema>, usage: { turns: number, inputTokens: number, outputTokens: number } }>;
52855285
/**
52865286
* Emitted when the agent makes a turn.
52875287
*/

packages/playwright-core/src/client/pageAgent.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ export class PageAgent extends ChannelOwner<channels.PageAgentChannel> implement
5151
return { usage };
5252
}
5353

54-
async extract<Schema extends z.ZodTypeAny>(query: string, schema: Schema, options: PageAgentOptions = {}): Promise<z.infer<Schema>> {
54+
async extract<Schema extends z.ZodTypeAny>(query: string, schema: Schema, options: PageAgentOptions = {}): Promise<{ result: z.infer<Schema>, usage: channels.AgentUsage }> {
5555
const { result, usage } = await this._channel.extract({ query, schema: this._page._platform.zodToJsonSchema(schema), ...options });
5656
return { result, usage };
5757
}

packages/playwright-core/src/server/agent/context.ts

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -27,47 +27,53 @@ import type { Language } from '../../utils/isomorphic/locatorGenerators.ts';
2727
import type { ToolDefinition } from './tool';
2828
import type * as channels from '@protocol/channels';
2929

30+
31+
type HistoryItem = {
32+
type: 'expect' | 'perform' | 'extract';
33+
description: string;
34+
};
3035
export class Context {
3136
readonly page: Page;
3237
readonly actions: actions.ActionWithCode[] = [];
3338
readonly sdkLanguage: Language;
34-
readonly progress: Progress;
35-
readonly options: channels.PageAgentParams;
36-
private _callIntent: string | undefined;
39+
readonly agentParams: channels.PageAgentParams;
40+
readonly events: loopTypes.LoopEvents;
41+
private _currentCallIntent: string | undefined;
42+
readonly history: HistoryItem[] = [];
3743

38-
constructor(apiCallProgress: Progress, page: Page, options: channels.PageAgentParams) {
39-
this.progress = apiCallProgress;
44+
constructor(page: Page, agentParms: channels.PageAgentParams, events: loopTypes.LoopEvents) {
4045
this.page = page;
41-
this.options = options;
46+
this.agentParams = agentParms;
4247
this.sdkLanguage = page.browserContext._browser.sdkLanguage();
48+
this.events = events;
4349
}
4450

45-
async callTool(tool: ToolDefinition, params: any, options: { intent?: string }) {
46-
this._callIntent = options.intent;
51+
async callTool(progress: Progress, tool: ToolDefinition, params: any, options: { intent?: string }) {
52+
this._currentCallIntent = options.intent;
4753
try {
48-
return await tool.handle(this, params);
54+
return await tool.handle(progress, this, params);
4955
} finally {
50-
this._callIntent = undefined;
56+
this._currentCallIntent = undefined;
5157
}
5258
}
5359

54-
async runActionAndWait(action: actions.Action) {
55-
return await this.runActionsAndWait([action]);
60+
async runActionAndWait(progress: Progress, action: actions.Action) {
61+
return await this.runActionsAndWait(progress, [action]);
5662
}
5763

58-
async runActionsAndWait(action: actions.Action[]) {
59-
const error = await this.waitForCompletion(async () => {
64+
async runActionsAndWait(progress: Progress, action: actions.Action[]) {
65+
const error = await this.waitForCompletion(progress, async () => {
6066
for (const a of action) {
61-
await runAction(this.progress, 'generate', this.page, a, this.options?.secrets ?? []);
67+
await runAction(progress, 'generate', this.page, a, this.agentParams?.secrets ?? []);
6268
const code = await generateCode(this.sdkLanguage, a);
63-
this.actions.push({ ...a, code, intent: this._callIntent });
69+
this.actions.push({ ...a, code, intent: this._currentCallIntent });
6470
}
6571
return undefined;
6672
}).catch((error: Error) => error);
67-
return await this.snapshotResult(error);
73+
return await this.snapshotResult(progress, error);
6874
}
6975

70-
async waitForCompletion<R>(callback: () => Promise<R>): Promise<R> {
76+
async waitForCompletion<R>(progress: Progress, callback: () => Promise<R>): Promise<R> {
7177
const requests: Request[] = [];
7278
const requestListener = (request: Request) => requests.push(request);
7379
const disposeListeners = () => {
@@ -78,14 +84,14 @@ export class Context {
7884
let result: R;
7985
try {
8086
result = await callback();
81-
await this.progress.wait(500);
87+
await progress.wait(500);
8288
} finally {
8389
disposeListeners();
8490
}
8591

8692
const requestedNavigation = requests.some(request => request.isNavigationRequest());
8793
if (requestedNavigation) {
88-
await this.page.mainFrame().waitForLoadState(this.progress, 'load');
94+
await this.page.mainFrame().waitForLoadState(progress, 'load');
8995
return result;
9096
}
9197

@@ -96,15 +102,15 @@ export class Context {
96102
else
97103
promises.push(request.response());
98104
}
99-
await this.progress.race(promises, { timeout: 5000 });
105+
await progress.race(promises, { timeout: 5000 });
100106
if (requests.length)
101-
await this.progress.wait(500);
107+
await progress.wait(500);
102108

103109
return result;
104110
}
105111

106-
async snapshotResult(error?: Error): Promise<loopTypes.ToolResult> {
107-
let { full } = await this.page.snapshotForAI(this.progress);
112+
async snapshotResult(progress: Progress, error?: Error): Promise<loopTypes.ToolResult> {
113+
let { full } = await this.page.snapshotForAI(progress);
108114
full = this._redactText(full);
109115

110116
const text: string[] = [];
@@ -130,10 +136,10 @@ export class Context {
130136
};
131137
}
132138

133-
async refSelectors(params: { element: string, ref: string }[]): Promise<string[]> {
139+
async refSelectors(progress: Progress, params: { element: string, ref: string }[]): Promise<string[]> {
134140
return Promise.all(params.map(async param => {
135141
try {
136-
const { resolvedSelector } = await this.page.mainFrame().resolveSelector(this.progress, `aria-ref=${param.ref}`);
142+
const { resolvedSelector } = await this.page.mainFrame().resolveSelector(progress, `aria-ref=${param.ref}`);
137143
return resolvedSelector;
138144
} catch (e) {
139145
throw new Error(`Ref ${param.ref} not found in the current page snapshot. Try capturing new snapshot.`);
@@ -142,7 +148,7 @@ export class Context {
142148
}
143149

144150
private _redactText(text: string): string {
145-
const secrets = this.options?.secrets;
151+
const secrets = this.agentParams?.secrets;
146152
if (!secrets)
147153
return text;
148154

packages/playwright-core/src/server/agent/expectTools.ts

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ const expectVisible = defineTool({
3333
}),
3434
},
3535

36-
handle: async (context, params) => {
37-
return await context.runActionAndWait({
36+
handle: async (progress, context, params) => {
37+
return await context.runActionAndWait(progress, {
3838
method: 'expectVisible',
3939
selector: getByRoleSelector(params.role, { name: params.accessibleName }),
4040
isNot: params.isNot,
@@ -53,8 +53,8 @@ const expectVisibleText = defineTool({
5353
}),
5454
},
5555

56-
handle: async (context, params) => {
57-
return await context.runActionAndWait({
56+
handle: async (progress, context, params) => {
57+
return await context.runActionAndWait(progress, {
5858
method: 'expectVisible',
5959
selector: getByTextSelector(params.text),
6060
isNot: params.isNot,
@@ -76,9 +76,9 @@ const expectValue = defineTool({
7676
}),
7777
},
7878

79-
handle: async (context, params) => {
80-
const [selector] = await context.refSelectors([{ ref: params.ref, element: params.element }]);
81-
return await context.runActionAndWait({
79+
handle: async (progress, context, params) => {
80+
const [selector] = await context.refSelectors(progress, [{ ref: params.ref, element: params.element }]);
81+
return await context.runActionAndWait(progress, {
8282
method: 'expectValue',
8383
selector,
8484
type: params.type,
@@ -102,10 +102,10 @@ const expectList = defineTool({
102102
}),
103103
},
104104

105-
handle: async (context, params) => {
105+
handle: async (progress, context, params) => {
106106
const template = `- ${params.listRole}:
107-
${params.items.map(item => ` - ${params.itemRole}: ${yamlEscapeValueIfNeeded(item)}`).join('\n')}`;
108-
return await context.runActionAndWait({
107+
progress, ${params.items.map(item => ` - ${params.itemRole}: ${yamlEscapeValueIfNeeded(item)}`).join('\n')}`;
108+
return await context.runActionAndWait(progress, {
109109
method: 'expectAria',
110110
template,
111111
});

packages/playwright-core/src/server/agent/pageAgent.ts

Lines changed: 55 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,20 @@ import { Context } from './context';
2424
import performTools from './performTools';
2525
import expectTools from './expectTools';
2626

27-
import type * as channels from '@protocol/channels';
2827
import type * as actions from './actions';
2928
import type { ToolDefinition } from './tool';
3029
import type * as loopTypes from '@lowire/loop';
30+
import type { Progress } from '../progress';
3131

32-
export async function pageAgentPerform(context: Context, options: loopTypes.LoopEvents & channels.PageAgentPerformParams) {
33-
const cacheKey = (options.cacheKey ?? options.task).trim();
34-
if (await cachedPerform(context, cacheKey))
32+
export type CallParams = {
33+
cacheKey?: string;
34+
maxTokens?: number;
35+
maxTurns?: number;
36+
};
37+
38+
export async function pageAgentPerform(progress: Progress, context: Context, userTask: string, callParams: CallParams) {
39+
const cacheKey = (callParams.cacheKey ?? userTask).trim();
40+
if (await cachedPerform(progress, context, cacheKey))
3541
return;
3642

3743
const task = `
@@ -40,16 +46,17 @@ export async function pageAgentPerform(context: Context, options: loopTypes.Loop
4046
- Your reply should be a tool call that performs action the page".
4147
4248
### Task
43-
${options.task}
49+
${userTask}
4450
`;
4551

46-
await runLoop(context, performTools, task, undefined, options);
52+
await runLoop(progress, context, performTools, task, undefined, callParams);
53+
context.history.push({ type: 'perform', description: userTask });
4754
await updateCache(context, cacheKey);
4855
}
4956

50-
export async function pageAgentExpect(context: Context, options: loopTypes.LoopEvents & channels.PageAgentExpectParams) {
51-
const cacheKey = (options.cacheKey ?? options.expectation).trim();
52-
if (await cachedPerform(context, cacheKey))
57+
export async function pageAgentExpect(progress: Progress, context: Context, expectation: string, callParams: CallParams) {
58+
const cacheKey = (callParams.cacheKey ?? expectation).trim();
59+
if (await cachedPerform(progress, context, cacheKey))
5360
return;
5461

5562
const task = `
@@ -58,76 +65,86 @@ export async function pageAgentExpect(context: Context, options: loopTypes.LoopE
5865
- You can call exactly one tool and it can't be report_results, must be one of the assertion tools.
5966
6067
### Expectation
61-
${options.expectation}
68+
${expectation}
6269
`;
6370

64-
await runLoop(context, expectTools, task, undefined, options);
71+
await runLoop(progress, context, expectTools, task, undefined, callParams);
72+
context.history.push({ type: 'expect', description: expectation });
6573
await updateCache(context, cacheKey);
6674
}
6775

68-
export async function runLoop(context: Context, toolDefinitions: ToolDefinition[], userTask: string, resultSchema: loopTypes.Schema | undefined, options: loopTypes.LoopEvents & {
69-
api?: string,
70-
apiEndpoint?: string,
71-
apiKey?: string,
72-
model?: string,
73-
maxTurns?: number;
74-
maxTokens?: number;
75-
}): Promise<{
76+
export async function pageAgentExtract(progress: Progress, context: Context, query: string, schema: loopTypes.Schema, callParams: CallParams): Promise<any> {
77+
78+
const task = `
79+
### Instructions
80+
Extract the following information from the page. Do not perform any actions, just extract the information.
81+
82+
### Query
83+
${query}`;
84+
const { result } = await runLoop(progress, context, [], task, schema, callParams);
85+
context.history.push({ type: 'extract', description: query });
86+
return result;
87+
}
88+
89+
async function runLoop(progress: Progress, context: Context, toolDefinitions: ToolDefinition[], userTask: string, resultSchema: loopTypes.Schema | undefined, params: CallParams): Promise<{
7690
result: any
7791
}> {
7892
const { page } = context;
93+
if (!context.agentParams.api || !context.agentParams.apiKey || !context.agentParams.model)
94+
throw new Error(`This action requires the API and API key to be set on the page agent. Are you running with --run-agents=none mode?`);
7995

80-
if (!context.options?.api || !context.options?.apiKey || !context.options?.model)
81-
throw new Error(`This action requires the API and API key to be set on the browser context`);
82-
83-
const { full } = await page.snapshotForAI(context.progress);
84-
const { tools, callTool } = toolsForLoop(context, toolDefinitions, { resultSchema });
96+
const { full } = await page.snapshotForAI(progress);
97+
const { tools, callTool, reportedResult } = toolsForLoop(progress, context, toolDefinitions, { resultSchema });
8598

8699
const loop = new Loop({
87-
api: context.options.api as any,
88-
apiEndpoint: context.options.apiEndpoint,
89-
apiKey: context.options.apiKey,
90-
model: context.options.model,
91-
maxTurns: context.options.maxTurns,
92-
maxTokens: context.options.maxTokens,
100+
api: context.agentParams.api as any,
101+
apiEndpoint: context.agentParams.apiEndpoint,
102+
apiKey: context.agentParams.apiKey,
103+
model: context.agentParams.model,
104+
maxTurns: params.maxTurns ?? context.agentParams.maxTurns,
105+
maxTokens: params.maxTokens ?? context.agentParams.maxTokens,
93106
summarize: true,
94107
debug,
95108
callTool,
96109
tools,
110+
...context.events,
97111
});
98112

99113
const task = `${userTask}
100114
115+
### Context history
116+
${context.history.map(h => `- ${h.type}: ${h.description}`).join('\n')}
117+
101118
### Page snapshot
102119
${full}
103120
`;
104121

105-
const { result } = await loop.run(task);
106-
return { result };
122+
await loop.run(task);
123+
return { result: resultSchema ? reportedResult() : undefined };
107124
}
108125

109126
type CachedActions = Record<string, {
110127
timestamp: number,
111128
actions: actions.ActionWithCode[],
112129
}>;
113130

114-
async function cachedPerform(context: Context, cacheKey: string): Promise<actions.ActionWithCode[] | undefined> {
115-
if (!context.options?.cacheFile)
131+
async function cachedPerform(progress: Progress, context: Context, cacheKey: string): Promise<actions.ActionWithCode[] | undefined> {
132+
if (!context.agentParams?.cacheFile)
116133
return;
117134

118-
const cache = await cachedActions(context.options?.cacheFile);
135+
const cache = await cachedActions(context.agentParams?.cacheFile);
119136
const entry = cache.actions[cacheKey];
120137
if (!entry)
121138
return;
122139

123140
for (const action of entry.actions)
124-
await runAction(context.progress, 'run', context.page, action, context.options.secrets ?? []);
141+
await runAction(progress, 'run', context.page, action, context.agentParams.secrets ?? []);
125142
return entry.actions;
126143
}
127144

128145
async function updateCache(context: Context, cacheKey: string) {
129-
const cacheFile = context.options?.cacheFile;
130-
const cacheOutFile = context.options?.cacheOutFile;
146+
const cacheFile = context.agentParams?.cacheFile;
147+
const cacheOutFile = context.agentParams?.cacheOutFile;
131148
const cacheFileKey = cacheFile ?? cacheOutFile;
132149

133150
const cache = cacheFileKey ? await cachedActions(cacheFileKey) : { actions: {}, newActions: {} };

0 commit comments

Comments
 (0)