Skip to content

Commit b2adf2d

Browse files
authored
Merge pull request #51 from drivecore/better-browser
Better browser experience: show browser, take over user session, content filter, robustness improvements
2 parents 6eafae9 + 2ea1bfe commit b2adf2d

23 files changed

+622
-35
lines changed

packages/agent/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# mycoder-agent
22

3+
## 0.3.0
4+
5+
### Minor Changes
6+
7+
- Better browser experience: show browser, take over user session, content filter, robustness improvements to browsing.
8+
39
## 0.2.1
410

511
### Patch Changes

packages/agent/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "mycoder-agent",
3-
"version": "0.2.1",
3+
"version": "0.3.0",
44
"description": "Agent module for mycoder - an AI-powered software development assistant",
55
"type": "module",
66
"main": "dist/index.js",
@@ -45,10 +45,12 @@
4545
"license": "MIT",
4646
"dependencies": {
4747
"@anthropic-ai/sdk": "^0.37",
48+
"@mozilla/readability": "^0.5.0",
4849
"@playwright/test": "^1.50.1",
4950
"@vitest/browser": "^3.0.5",
5051
"chalk": "^5",
5152
"dotenv": "^16",
53+
"jsdom": "^26.0.0",
5254
"playwright": "^1.50.1",
5355
"uuid": "^11",
5456
"zod": "^3",

packages/agent/src/core/executeToolCall.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ export const executeToolCall = async (
2323
customPrefix: tool.logPrefix,
2424
});
2525

26-
const toolContext = {
26+
const toolContext: ToolContext = {
2727
...context,
2828
logger,
2929
};

packages/agent/src/core/toolAgent.respawn.test.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@ import { getTools } from '../../src/tools/getTools.js';
55
import { MockLogger } from '../utils/mockLogger.js';
66

77
import { TokenTracker } from './tokens.js';
8+
import { ToolContext } from './types.js';
89

9-
const toolContext = {
10+
const toolContext: ToolContext = {
1011
logger: new MockLogger(),
1112
headless: true,
1213
workingDirectory: '.',
14+
userSession: false,
15+
pageFilter: 'simple',
1316
tokenTracker: new TokenTracker(),
1417
};
1518
// Mock Anthropic SDK

packages/agent/src/core/toolAgent.test.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@ import { MockLogger } from '../utils/mockLogger.js';
55
import { executeToolCall } from './executeToolCall.js';
66
import { TokenTracker } from './tokens.js';
77
import { toolAgent } from './toolAgent.js';
8-
import { Tool } from './types.js';
8+
import { Tool, ToolContext } from './types.js';
99

10-
const toolContext = {
10+
const toolContext: ToolContext = {
1111
logger: new MockLogger(),
1212
headless: true,
1313
workingDirectory: '.',
14+
userSession: false,
15+
pageFilter: 'simple',
1416
tokenTracker: new TokenTracker(),
1517
};
1618

packages/agent/src/core/types.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@ import { TokenTracker } from './tokens.js';
66

77
export type TokenLevel = 'debug' | 'verbose' | 'info' | 'warn' | 'error';
88

9+
export type pageFilter = 'simple' | 'none' | 'readability';
10+
911
export type ToolContext = {
1012
logger: Logger;
1113
workingDirectory: string;
1214
headless: boolean;
15+
userSession: boolean;
16+
pageFilter: pageFilter;
1317
tokenTracker: TokenTracker;
1418
};
1519

packages/agent/src/tools/browser/browseMessage.ts

Lines changed: 71 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ import { zodToJsonSchema } from 'zod-to-json-schema';
33

44
import { Tool } from '../../core/types.js';
55
import { errorToString } from '../../utils/errorToString.js';
6+
import { sleep } from '../../utils/sleep.js';
67

8+
import { filterPageContent } from './filterPageContent.js';
79
import { browserSessions, type BrowserAction, SelectorType } from './types.js';
810

911
// Schema for browser action
@@ -71,8 +73,29 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
7173
parameters: zodToJsonSchema(parameterSchema),
7274
returns: zodToJsonSchema(returnSchema),
7375

74-
execute: async ({ instanceId, action }, { logger }): Promise<ReturnType> => {
76+
execute: async (
77+
{ instanceId, action },
78+
{ logger, pageFilter },
79+
): Promise<ReturnType> => {
80+
// Validate action format
81+
if (!action || typeof action !== 'object') {
82+
logger.error('Invalid action format: action must be an object');
83+
return {
84+
status: 'error',
85+
error: 'Invalid action format: action must be an object',
86+
};
87+
}
88+
89+
if (!action.actionType) {
90+
logger.error('Invalid action format: actionType is required');
91+
return {
92+
status: 'error',
93+
error: 'Invalid action format: actionType is required',
94+
};
95+
}
96+
7597
logger.verbose(`Executing browser action: ${action.actionType}`);
98+
logger.verbose(`Webpage processing mode: ${pageFilter}`);
7699

77100
try {
78101
const session = browserSessions.get(instanceId);
@@ -87,10 +110,44 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
87110
if (!action.url) {
88111
throw new Error('URL required for goto action');
89112
}
90-
await page.goto(action.url, { waitUntil: 'networkidle' });
91-
const content = await page.content();
92-
logger.verbose('Navigation completed successfully');
93-
return { status: 'success', content };
113+
114+
try {
115+
// Try with 'domcontentloaded' first which is more reliable than 'networkidle'
116+
logger.verbose(
117+
`Navigating to ${action.url} with 'domcontentloaded' waitUntil`,
118+
);
119+
await page.goto(action.url, { waitUntil: 'domcontentloaded' });
120+
await sleep(3000);
121+
const content = await filterPageContent(page, pageFilter);
122+
logger.verbose(`Content: ${content}`);
123+
logger.verbose(
124+
'Navigation completed with domcontentloaded strategy',
125+
);
126+
logger.verbose(`Content length: ${content.length} characters`);
127+
return { status: 'success', content };
128+
} catch (navError) {
129+
// If that fails, try with no waitUntil option
130+
logger.warn(
131+
`Failed with domcontentloaded strategy: ${errorToString(navError)}`,
132+
);
133+
logger.verbose(
134+
`Retrying navigation to ${action.url} with no waitUntil option`,
135+
);
136+
137+
try {
138+
await page.goto(action.url);
139+
await sleep(3000);
140+
const content = await filterPageContent(page, pageFilter);
141+
logger.verbose(`Content: ${content}`);
142+
logger.verbose('Navigation completed with basic strategy');
143+
return { status: 'success', content };
144+
} catch (innerError) {
145+
logger.error(
146+
`Failed with basic navigation strategy: ${errorToString(innerError)}`,
147+
);
148+
throw innerError; // Re-throw to be caught by outer catch block
149+
}
150+
}
94151
}
95152

96153
case 'click': {
@@ -102,7 +159,8 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
102159
action.selectorType,
103160
);
104161
await page.click(clickSelector);
105-
const content = await page.content();
162+
await sleep(1000); // Wait for any content changes after click
163+
const content = await filterPageContent(page, pageFilter);
106164
logger.verbose(
107165
`Click action completed on selector: ${clickSelector}`,
108166
);
@@ -136,8 +194,9 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
136194
}
137195

138196
case 'content': {
139-
const content = await page.content();
197+
const content = await filterPageContent(page, pageFilter);
140198
logger.verbose('Page content retrieved successfully');
199+
logger.verbose(`Content length: ${content.length} characters`);
141200
return { status: 'success', content };
142201
}
143202

@@ -164,9 +223,12 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
164223
}
165224
},
166225

167-
logParameters: ({ action, description }, { logger }) => {
226+
logParameters: (
227+
{ action, description },
228+
{ logger, pageFilter = 'simple' },
229+
) => {
168230
logger.info(
169-
`Performing browser action: ${action.actionType}, ${description}`,
231+
`Performing browser action: ${action.actionType} with ${pageFilter} processing, ${description}`,
170232
);
171233
},
172234

packages/agent/src/tools/browser/browseStart.ts

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@ import { zodToJsonSchema } from 'zod-to-json-schema';
55

66
import { Tool } from '../../core/types.js';
77
import { errorToString } from '../../utils/errorToString.js';
8+
import { sleep } from '../../utils/sleep.js';
89

10+
import { filterPageContent } from './filterPageContent.js';
911
import { browserSessions } from './types.js';
1012

1113
const parameterSchema = z.object({
1214
url: z.string().url().optional().describe('Initial URL to navigate to'),
13-
1415
timeout: z
1516
.number()
1617
.optional()
@@ -40,23 +41,37 @@ export const browseStartTool: Tool<Parameters, ReturnType> = {
4041

4142
execute: async (
4243
{ url, timeout = 30000 },
43-
{ logger, headless = true },
44+
{ logger, headless, userSession, pageFilter },
4445
): Promise<ReturnType> => {
4546
logger.verbose(`Starting browser session${url ? ` at ${url}` : ''}`);
47+
logger.verbose(
48+
`User session mode: ${userSession ? 'enabled' : 'disabled'}`,
49+
);
50+
logger.verbose(`Webpage processing mode: ${pageFilter}`);
4651

4752
try {
4853
const instanceId = uuidv4();
4954

5055
// Launch browser
51-
const browser = await chromium.launch({
56+
const launchOptions = {
5257
headless,
53-
});
58+
};
59+
60+
// Use system Chrome installation if userSession is true
61+
if (userSession) {
62+
logger.verbose('Using system Chrome installation');
63+
// For Chrome, we use the channel option to specify Chrome
64+
launchOptions['channel'] = 'chrome';
65+
}
66+
67+
const browser = await chromium.launch(launchOptions);
5468

5569
// Create new context with default settings
5670
const context = await browser.newContext({
5771
viewport: null,
5872
userAgent:
5973
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
74+
serviceWorkers: 'block', // Block service workers which can cause continuous network activity
6075
});
6176

6277
// Create new page
@@ -80,11 +95,42 @@ export const browseStartTool: Tool<Parameters, ReturnType> = {
8095
// Navigate to URL if provided
8196
let content = '';
8297
if (url) {
83-
await page.goto(url, { waitUntil: 'networkidle' });
84-
content = await page.content();
98+
try {
99+
// Try with 'domcontentloaded' first which is more reliable than 'networkidle'
100+
logger.verbose(
101+
`Navigating to ${url} with 'domcontentloaded' waitUntil`,
102+
);
103+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout });
104+
await sleep(3000);
105+
content = await filterPageContent(page, pageFilter);
106+
logger.verbose(`Content: ${content}`);
107+
logger.verbose('Navigation completed with domcontentloaded strategy');
108+
} catch (error) {
109+
// If that fails, try with no waitUntil option at all (most basic)
110+
logger.warn(
111+
`Failed with domcontentloaded strategy: ${errorToString(error)}`,
112+
);
113+
logger.verbose(
114+
`Retrying navigation to ${url} with no waitUntil option`,
115+
);
116+
117+
try {
118+
await page.goto(url, { timeout });
119+
await sleep(3000);
120+
content = await filterPageContent(page, pageFilter);
121+
logger.verbose(`Content: ${content}`);
122+
logger.verbose('Navigation completed with basic strategy');
123+
} catch (innerError) {
124+
logger.error(
125+
`Failed with basic navigation strategy: ${errorToString(innerError)}`,
126+
);
127+
throw innerError; // Re-throw to be caught by outer catch block
128+
}
129+
}
85130
}
86131

87132
logger.verbose('Browser session started successfully');
133+
logger.verbose(`Content length: ${content.length} characters`);
88134

89135
return {
90136
instanceId,
@@ -101,9 +147,9 @@ export const browseStartTool: Tool<Parameters, ReturnType> = {
101147
}
102148
},
103149

104-
logParameters: ({ url, description }, { logger }) => {
150+
logParameters: ({ url, description }, { logger, pageFilter = 'simple' }) => {
105151
logger.info(
106-
`Starting browser session${url ? ` at ${url}` : ''}, ${description}`,
152+
`Starting browser session${url ? ` at ${url}` : ''} with ${pageFilter} processing, ${description}`,
107153
);
108154
},
109155

0 commit comments

Comments
 (0)