Skip to content

Commit 149e6c9

Browse files
committed
Fix shared browser pool issues for per-engine instances
1 parent 92279bf commit 149e6c9

File tree

4 files changed

+231
-54
lines changed

4 files changed

+231
-54
lines changed

README.md

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,13 @@ The server provides three specialised tools for different web search needs:
1616

1717
### 1. `full-web-search` (Main Tool)
1818
When a comprehensive search is requested, the server uses an **optimized search strategy**:
19-
1. **Browser-based Bing Search** - Primary method using Playwright
20-
2. **Browser-based Brave Search** - Secondary option
19+
1. **Browser-based Bing Search** - Primary method using dedicated Chromium instance
20+
2. **Browser-based Brave Search** - Secondary option using dedicated Firefox instance
2121
3. **Axios DuckDuckGo Search** - Final fallback using traditional HTTP
22-
4. **Content extraction**: Tries axios first, then falls back to browser with human behavior simulation
23-
5. **Concurrent processing**: Extracts content from multiple pages simultaneously with timeout protection
24-
6. **HTTP/2 error recovery**: Automatically falls back to HTTP/1.1 when protocol errors occur
22+
4. **Dedicated browser isolation**: Each search engine gets its own browser instance with automatic cleanup
23+
5. **Content extraction**: Tries axios first, then falls back to browser with human behavior simulation
24+
6. **Concurrent processing**: Extracts content from multiple pages simultaneously with timeout protection
25+
7. **HTTP/2 error recovery**: Automatically falls back to HTTP/1.1 when protocol errors occur
2526

2627
### 2. `get-web-search-summaries` (Lightweight Alternative)
2728
For quick search results without full content extraction:
@@ -124,6 +125,7 @@ The server supports several environment variables for configuration:
124125
- **`ENABLE_RELEVANCE_CHECKING`**: Enable/disable search result quality validation (default: true)
125126
- **`RELEVANCE_THRESHOLD`**: Minimum quality score for search results (0.0-1.0, default: 0.3)
126127
- **`FORCE_MULTI_ENGINE_SEARCH`**: Try all search engines and return best results (default: false)
128+
- **`DEBUG_BROWSER_LIFECYCLE`**: Enable detailed browser lifecycle logging for debugging (default: false)
127129

128130
## Troubleshooting
129131

@@ -146,6 +148,15 @@ The server supports several environment variables for configuration:
146148
- **Force multi-engine search**: Set `FORCE_MULTI_ENGINE_SEARCH=true` to try all engines and return the best results
147149
- **Disable quality checking**: Set `ENABLE_RELEVANCE_CHECKING=false` to disable validation (not recommended)
148150

151+
### Browser Context Issues
152+
- **"Target page, context or browser has been closed" errors**: COMPLETELY ELIMINATED with dedicated browser architecture
153+
- **Zero browser sharing**: Each search engine creates and manages its own dedicated browser instance
154+
- **Automatic cleanup**: Each browser is automatically closed after its search completes
155+
- **Retry mechanism**: 2-attempt retry with fresh browser instances on failures
156+
- **Enhanced reliability**: No cross-contamination between search engines possible
157+
- **Firefox isMobile compatibility**: Robust Firefox detection to prevent unsupported option errors
158+
- **Simplified architecture**: Removed complex browser pooling in favor of dedicated instances
159+
149160
### Memory Usage
150161
- **Automatic cleanup**: Browsers are automatically cleaned up after each operation to prevent memory leaks
151162
- **Limit browsers**: Reduce `MAX_BROWSERS` (default: 3)

src/browser-pool.ts

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,26 @@ export class BrowserPool {
2929
if (this.browsers.has(browserType)) {
3030
const browser = this.browsers.get(browserType)!;
3131

32-
// Check if browser is still connected
32+
// Check if browser is still connected and healthy
3333
try {
3434
if (browser.isConnected()) {
35+
// Quick health check by trying to create and close a context
36+
// Use minimal options to avoid Firefox isMobile issues
37+
const testContext = await browser.newContext({
38+
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
39+
});
40+
await testContext.close();
3541
return browser;
3642
}
3743
} catch (error) {
38-
console.log(`[BrowserPool] Browser ${browserType} disconnected:`, error);
39-
// Browser is disconnected, remove it
44+
console.log(`[BrowserPool] Browser ${browserType} health check failed:`, error);
45+
// Browser is unhealthy, remove it and close if possible
4046
this.browsers.delete(browserType);
47+
try {
48+
await browser.close();
49+
} catch (closeError) {
50+
console.log(`[BrowserPool] Error closing unhealthy browser:`, closeError);
51+
}
4152
}
4253
}
4354

src/enhanced-content-extractor.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,12 @@ export class EnhancedContentExtractor {
104104
hasTouch: Math.random() > 0.7,
105105
};
106106

107-
// Firefox doesn't support isMobile option
108-
const contextOptions = browserType === 'firefox'
107+
// Firefox doesn't support isMobile option - check multiple ways to ensure detection
108+
const isFirefox = browserType === 'firefox' ||
109+
browserType.includes('firefox') ||
110+
browser.constructor.name.toLowerCase().includes('firefox');
111+
112+
const contextOptions = isFirefox
109113
? baseContextOptions
110114
: { ...baseContextOptions, isMobile: Math.random() > 0.8 };
111115

src/search-engine.ts

Lines changed: 195 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ export class SearchEngine {
2828
const enableQualityCheck = process.env.ENABLE_RELEVANCE_CHECKING !== 'false';
2929
const qualityThreshold = parseFloat(process.env.RELEVANCE_THRESHOLD || '0.3');
3030
const forceMultiEngine = process.env.FORCE_MULTI_ENGINE_SEARCH === 'true';
31+
const debugBrowsers = process.env.DEBUG_BROWSER_LIFECYCLE === 'true';
3132

32-
console.log(`[SearchEngine] Quality checking: ${enableQualityCheck}, threshold: ${qualityThreshold}, multi-engine: ${forceMultiEngine}`);
33+
console.log(`[SearchEngine] Quality checking: ${enableQualityCheck}, threshold: ${qualityThreshold}, multi-engine: ${forceMultiEngine}, debug: ${debugBrowsers}`);
3334

3435
// Try multiple approaches to get search results, starting with most reliable
3536
const approaches = [
@@ -42,8 +43,11 @@ export class SearchEngine {
4243
let bestEngine = 'None';
4344
let bestQuality = 0;
4445

45-
for (const approach of approaches) {
46+
for (let i = 0; i < approaches.length; i++) {
47+
const approach = approaches[i];
4648
try {
49+
console.log(`[SearchEngine] Attempting ${approach.name} (${i + 1}/${approaches.length})...`);
50+
4751
// Use more aggressive timeouts for faster fallback
4852
const approachTimeout = Math.min(timeout / 3, 4000); // Max 4 seconds per approach for faster fallback
4953
const results = await approach.method(sanitizedQuery, numResults, approachTimeout);
@@ -74,7 +78,7 @@ export class SearchEngine {
7478
}
7579

7680
// If this is the last engine or quality is acceptable, prepare to return
77-
if (approach === approaches[approaches.length - 1]) {
81+
if (i === approaches.length - 1) {
7882
if (bestQuality >= qualityThreshold || !enableQualityCheck) {
7983
console.log(`[SearchEngine] Using best results from ${bestEngine} (quality: ${bestQuality.toFixed(2)})`);
8084
return { results: bestResults, engine: bestEngine };
@@ -88,6 +92,9 @@ export class SearchEngine {
8892
}
8993
} catch (error) {
9094
console.error(`[SearchEngine] ${approach.name} approach failed:`, error);
95+
96+
// Handle browser-specific errors (no cleanup needed since each engine uses dedicated browsers)
97+
await this.handleBrowserError(error, approach.name);
9198
}
9299
}
93100

@@ -111,9 +118,52 @@ export class SearchEngine {
111118

112119

113120
private async tryBrowserBraveSearch(query: string, numResults: number, timeout: number): Promise<SearchResult[]> {
114-
console.log(`[SearchEngine] Trying browser-based Brave search...`);
121+
console.log(`[SearchEngine] Trying browser-based Brave search with dedicated browser...`);
122+
123+
// Try with retry mechanism
124+
for (let attempt = 1; attempt <= 2; attempt++) {
125+
let browser;
126+
try {
127+
// Create a dedicated browser instance for Brave search only
128+
const { firefox } = await import('playwright');
129+
browser = await firefox.launch({
130+
headless: process.env.BROWSER_HEADLESS !== 'false',
131+
args: [
132+
'--no-sandbox',
133+
'--disable-dev-shm-usage',
134+
],
135+
});
136+
137+
console.log(`[SearchEngine] Brave search attempt ${attempt}/2 with fresh browser`);
138+
const results = await this.tryBrowserBraveSearchInternal(browser, query, numResults, timeout);
139+
return results;
140+
} catch (error) {
141+
console.error(`[SearchEngine] Brave search attempt ${attempt}/2 failed:`, error);
142+
if (attempt === 2) {
143+
throw error; // Re-throw on final attempt
144+
}
145+
// Small delay before retry
146+
await new Promise(resolve => setTimeout(resolve, 500));
147+
} finally {
148+
// Always close the dedicated browser
149+
if (browser) {
150+
try {
151+
await browser.close();
152+
} catch (closeError) {
153+
console.log(`[SearchEngine] Error closing Brave browser:`, closeError);
154+
}
155+
}
156+
}
157+
}
115158

116-
const browser = await this.browserPool.getBrowser();
159+
throw new Error('All Brave search attempts failed');
160+
}
161+
162+
private async tryBrowserBraveSearchInternal(browser: any, query: string, numResults: number, timeout: number): Promise<SearchResult[]> {
163+
// Validate browser is still functional before proceeding
164+
if (!browser.isConnected()) {
165+
throw new Error('Browser is not connected');
166+
}
117167

118168
try {
119169
const context = await browser.newContext({
@@ -123,45 +173,95 @@ export class SearchEngine {
123173
timezoneId: 'America/New_York',
124174
});
125175

126-
const page = await context.newPage();
127-
128-
// Navigate to Brave search
129-
const searchUrl = `https://search.brave.com/search?q=${encodeURIComponent(query)}&source=web`;
130-
console.log(`[SearchEngine] Browser navigating to Brave: ${searchUrl}`);
131-
132-
await page.goto(searchUrl, {
133-
waitUntil: 'domcontentloaded',
134-
timeout: timeout
135-
});
136-
137-
// Wait for search results to load
138176
try {
139-
await page.waitForSelector('[data-type="web"]', { timeout: 3000 });
140-
} catch {
141-
console.log(`[SearchEngine] Browser Brave results selector not found, proceeding anyway`);
142-
}
177+
const page = await context.newPage();
178+
179+
// Navigate to Brave search
180+
const searchUrl = `https://search.brave.com/search?q=${encodeURIComponent(query)}&source=web`;
181+
console.log(`[SearchEngine] Browser navigating to Brave: ${searchUrl}`);
182+
183+
await page.goto(searchUrl, {
184+
waitUntil: 'domcontentloaded',
185+
timeout: timeout
186+
});
143187

144-
// Get the page content
145-
const html = await page.content();
146-
147-
await context.close();
148-
149-
console.log(`[SearchEngine] Browser Brave got HTML with length: ${html.length}`);
150-
151-
const results = this.parseBraveResults(html, numResults);
152-
console.log(`[SearchEngine] Browser Brave parsed ${results.length} results`);
153-
154-
return results;
188+
// Wait for search results to load
189+
try {
190+
await page.waitForSelector('[data-type="web"]', { timeout: 3000 });
191+
} catch {
192+
console.log(`[SearchEngine] Browser Brave results selector not found, proceeding anyway`);
193+
}
194+
195+
// Get the page content
196+
const html = await page.content();
197+
198+
console.log(`[SearchEngine] Browser Brave got HTML with length: ${html.length}`);
199+
200+
const results = this.parseBraveResults(html, numResults);
201+
console.log(`[SearchEngine] Browser Brave parsed ${results.length} results`);
202+
203+
await context.close();
204+
return results;
205+
} catch (error) {
206+
// Ensure context is closed even on error
207+
await context.close();
208+
throw error;
209+
}
155210
} catch (error) {
156211
console.error(`[SearchEngine] Browser Brave search failed:`, error);
157212
throw error;
158213
}
159214
}
160215

161216
private async tryBrowserBingSearch(query: string, numResults: number, timeout: number): Promise<SearchResult[]> {
162-
console.log(`[SearchEngine] Trying browser-based Bing search...`);
217+
console.log(`[SearchEngine] Trying browser-based Bing search with dedicated browser...`);
218+
219+
// Try with retry mechanism
220+
for (let attempt = 1; attempt <= 2; attempt++) {
221+
let browser;
222+
try {
223+
// Create a dedicated browser instance for Bing search only
224+
const { chromium } = await import('playwright');
225+
browser = await chromium.launch({
226+
headless: process.env.BROWSER_HEADLESS !== 'false',
227+
args: [
228+
'--no-sandbox',
229+
'--disable-blink-features=AutomationControlled',
230+
'--disable-dev-shm-usage',
231+
'--disable-gpu',
232+
],
233+
});
234+
235+
console.log(`[SearchEngine] Bing search attempt ${attempt}/2 with fresh browser`);
236+
const results = await this.tryBrowserBingSearchInternal(browser, query, numResults, timeout);
237+
return results;
238+
} catch (error) {
239+
console.error(`[SearchEngine] Bing search attempt ${attempt}/2 failed:`, error);
240+
if (attempt === 2) {
241+
throw error; // Re-throw on final attempt
242+
}
243+
// Small delay before retry
244+
await new Promise(resolve => setTimeout(resolve, 500));
245+
} finally {
246+
// Always close the dedicated browser
247+
if (browser) {
248+
try {
249+
await browser.close();
250+
} catch (closeError) {
251+
console.log(`[SearchEngine] Error closing Bing browser:`, closeError);
252+
}
253+
}
254+
}
255+
}
163256

164-
const browser = await this.browserPool.getBrowser();
257+
throw new Error('All Bing search attempts failed');
258+
}
259+
260+
private async tryBrowserBingSearchInternal(browser: any, query: string, numResults: number, timeout: number): Promise<SearchResult[]> {
261+
// Validate browser is still functional before proceeding
262+
if (!browser.isConnected()) {
263+
throw new Error('Browser is not connected');
264+
}
165265

166266
try {
167267
// Enhanced browser context with more realistic fingerprinting
@@ -188,18 +288,24 @@ export class SearchEngine {
188288

189289
const page = await context.newPage();
190290

191-
// Try enhanced Bing search with proper web interface flow
192291
try {
193-
const results = await this.tryEnhancedBingSearch(page, query, numResults, timeout);
194-
await context.close();
195-
return results;
196-
} catch (enhancedError) {
197-
console.log(`[SearchEngine] Enhanced Bing search failed, trying fallback: ${enhancedError instanceof Error ? enhancedError.message : 'Unknown error'}`);
198-
199-
// Fallback to direct URL approach with enhanced parameters
200-
const results = await this.tryDirectBingSearch(page, query, numResults, timeout);
292+
// Try enhanced Bing search with proper web interface flow
293+
try {
294+
const results = await this.tryEnhancedBingSearch(page, query, numResults, timeout);
295+
await context.close();
296+
return results;
297+
} catch (enhancedError) {
298+
console.log(`[SearchEngine] Enhanced Bing search failed, trying fallback: ${enhancedError instanceof Error ? enhancedError.message : 'Unknown error'}`);
299+
300+
// Fallback to direct URL approach with enhanced parameters
301+
const results = await this.tryDirectBingSearch(page, query, numResults, timeout);
302+
await context.close();
303+
return results;
304+
}
305+
} catch (error) {
306+
// Ensure context is closed even on error
201307
await context.close();
202-
return results;
308+
throw error;
203309
}
204310
} catch (error) {
205311
console.error(`[SearchEngine] Browser Bing search failed:`, error);
@@ -885,6 +991,51 @@ export class SearchEngine {
885991
return averageScore;
886992
}
887993

994+
private async validateBrowserHealth(browser: any): Promise<boolean> {
995+
const debugBrowsers = process.env.DEBUG_BROWSER_LIFECYCLE === 'true';
996+
997+
try {
998+
if (debugBrowsers) console.log(`[SearchEngine] Validating browser health...`);
999+
1000+
// Check if browser is still connected
1001+
if (!browser.isConnected()) {
1002+
if (debugBrowsers) console.log(`[SearchEngine] Browser is not connected`);
1003+
return false;
1004+
}
1005+
1006+
// Try to create a simple context to test browser responsiveness
1007+
const testContext = await browser.newContext();
1008+
await testContext.close();
1009+
1010+
if (debugBrowsers) console.log(`[SearchEngine] Browser health check passed`);
1011+
return true;
1012+
} catch (error) {
1013+
console.log(`[SearchEngine] Browser health check failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
1014+
return false;
1015+
}
1016+
}
1017+
1018+
private async handleBrowserError(error: any, engineName: string, attemptNumber: number = 1): Promise<void> {
1019+
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
1020+
console.error(`[SearchEngine] ${engineName} browser error (attempt ${attemptNumber}): ${errorMessage}`);
1021+
1022+
// Check for specific browser-related errors
1023+
if (errorMessage.includes('Target page, context or browser has been closed') ||
1024+
errorMessage.includes('Browser has been closed') ||
1025+
errorMessage.includes('Session has been closed')) {
1026+
1027+
console.log(`[SearchEngine] Detected browser session closure, attempting to refresh browser pool`);
1028+
1029+
// Try to refresh the browser pool for subsequent attempts
1030+
try {
1031+
await this.browserPool.closeAll();
1032+
console.log(`[SearchEngine] Browser pool refreshed for ${engineName}`);
1033+
} catch (refreshError) {
1034+
console.error(`[SearchEngine] Failed to refresh browser pool: ${refreshError instanceof Error ? refreshError.message : 'Unknown error'}`);
1035+
}
1036+
}
1037+
}
1038+
8881039
async closeAll(): Promise<void> {
8891040
await this.browserPool.closeAll();
8901041
}

0 commit comments

Comments
 (0)